44 Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info,
const ITensorInfo *mm_input,
const ITensorInfo *mm_weights,
const ITensorInfo *
bias,
45 float gemmlowp_scale,
const TensorInfo *mm_res_info,
const TensorInfo *outstage_tensor_info)
54 Status NEQLSTMLayer::validate_layer_norm(
const ITensorInfo &in,
const ITensorInfo &weight,
const ITensorInfo &bias)
65 void NEQLSTMLayer::configure_layer_norm(NEQLSTMLayer::LayerNormGate g,
const ITensor *in)
69 Tensor &out = get_layer_norm_output(g);
70 _memory_group.manage(&out);
71 out.allocator()->init(*(in->info()));
73 get_layer_norm(g) = std::make_unique<NEQLSTMLayerNormalizationKernel>();
74 get_layer_norm(g)->configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
77 NEQLSTMLayer::TensorCopyKernel::~TensorCopyKernel() =
default;
95 _row_size = std::min(_src->info()->tensor_shape().x(), _dst->info()->tensor_shape().x());
101 Iterator input_iter{ _src, _window };
102 Iterator output_iter{ _dst, _window };
106 memcpy(output_iter.ptr(), input_iter.ptr(), _row_size);
108 input_iter, output_iter);
115 _dequantize_input_to_forget_weights(),
116 _quantize_input_to_forget_weights(),
117 _transpose_input_to_forget_weights(),
118 _transpose_input_to_cell_weights(),
119 _transpose_input_to_output_weights(),
120 _transpose_input_to_input_weights(),
121 _transpose_recurrent_to_forget_weights(),
122 _transpose_recurrent_to_cell_weights(),
123 _transpose_recurrent_to_output_weights(),
124 _transpose_recurrent_to_input_weights(),
125 _transpose_projection_weights(),
126 _input_to_input_reduction(),
127 _recurrent_to_input_reduction(),
128 _input_to_forget_reduction(),
129 _recurrent_to_forget_reduction(),
130 _input_to_cell_reduction(),
131 _recurrent_to_cell_reduction(),
132 _input_to_output_reduction(),
133 _recurrent_to_output_reduction(),
134 _projection_reduction(),
135 _projection_bias_add(),
136 _mm_input_to_forget(),
137 _mm_recurrent_to_forget(),
138 _pixelwise_mul_cell_to_forget(),
139 _input_to_forget_outstage(),
140 _recurrent_to_forget_outstage(),
141 _cell_to_forget_outstage(),
142 _accumulate_input_recurrent_forget(),
143 _accumulate_cell_forget(),
144 _forget_gate_sigmoid(),
146 _input_to_cell_outstage(),
147 _mm_recurrent_to_cell(),
148 _recurrent_to_cell_outstage(),
149 _accumulate_input_recurrent_modulation(),
152 _mm_input_to_input(),
153 _input_to_input_outstage(),
154 _mm_recurrent_to_input(),
155 _recurrent_to_input_outstage(),
156 _accumulate_input_recurrent_input(),
157 _pixelwise_mul_cell_to_input(),
158 _cell_to_input_outstage(),
159 _accumulate_cell_input(),
160 _input_gate_sigmoid(),
161 _pixelwise_mul_forget_cell(),
162 _pixelwise_mul_input_cell(),
165 _mm_input_to_output(),
166 _input_to_output_outstage(),
167 _mm_recurrent_to_output(),
168 _recurrent_to_output_outstage(),
169 _accumulate_input_recurrent_output(),
170 _pixelwise_mul_cell_to_output(),
171 _cell_to_output_outstage(),
172 _accumulate_cell_to_output(),
173 _output_gate_sigmoid(),
175 _pixelwise_mul_hidden(),
178 _projection_outstage(),
179 _accumulate_projection(),
181 _projection_bias_copy(),
182 _projection_output_to_accumulate_copy(),
183 _projection_accumulate_to_output_copy(),
184 _hidden_to_output_copy(),
187 _layer_norm_weights(),
191 _memory_group =
MemoryGroup(std::move(memory_manager));
196 Tensor *mm_res,
Tensor *outstage_res,
float gemmlowp_scale,
199 _memory_group.
manage(mm_res);
200 _memory_group.
manage(outstage_res);
206 mm.
configure(mm_input, mm_weights,
nullptr, mm_res);
210 outstage.
configure(mm_res, bias, outstage_res, gemmlowp_info);
223 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
224 forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
227 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
228 forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
243 _convert_input_to_forget_weights_to_qsymm8 =
true;
253 _dequantize_input_to_forget_weights.
configure(input_to_forget_weights, &_input_to_forget_weights_f32);
254 _quantize_input_to_forget_weights.
configure(&_input_to_forget_weights_f32, &_input_to_forget_weights_symm8);
257 recurrent_to_forget_weights->
info(), recurrent_to_cell_weights->
info(), recurrent_to_output_weights->
info(),
258 forget_gate_bias->
info(), cell_bias->
info(), output_gate_bias->
info(),
259 cell_state_in->
info(), output_state_in->
info(), cell_state_out->
info(), output_state_out->
info(), output->
info(),
265 recurrent_to_forget_weights->
info(), recurrent_to_cell_weights->
info(), recurrent_to_output_weights->
info(),
266 forget_gate_bias->
info(), cell_bias->
info(), output_gate_bias->
info(),
267 cell_state_in->
info(), output_state_in->
info(), cell_state_out->
info(), output_state_out->
info(), output->
info(),
272 const int num_units = input_to_output_weights->
info()->
dimension(1);
297 set_layer_norm_bias(forget_gate_bias, LayerNormGate::Forget);
298 set_layer_norm_bias(cell_bias, LayerNormGate::Cell);
299 set_layer_norm_bias(lstm_params.
input_gate_bias(), LayerNormGate::Input);
300 set_layer_norm_bias(output_gate_bias, LayerNormGate::Output);
308 const int32_t cell_shift = log2(qcell_state_in.
scale);
311 int16_t quantized_cell_clip = 0;
316 _has_cell_clipping = quantized_cell_clip > 0;
324 _input_to_input_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
325 _recurrent_to_input_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
330 _input_to_forget_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
331 _recurrent_to_forget_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
332 _input_to_cell_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
333 _recurrent_to_cell_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
334 _input_to_output_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
335 _recurrent_to_output_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
345 _projection_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
347 if(_projection_bias !=
nullptr)
354 _transpose_input_to_forget_weights.
configure(input_to_forget_weights, &_input_to_forget_weights_transposed);
355 _transpose_input_to_cell_weights.
configure(input_to_cell_weights, &_input_to_cell_weights_transposed);
356 _transpose_input_to_output_weights.
configure(input_to_output_weights, &_input_to_output_weights_transposed);
357 _transpose_recurrent_to_forget_weights.
configure(recurrent_to_forget_weights, &_recurrent_to_forget_weights_transposed);
358 _transpose_recurrent_to_cell_weights.
configure(recurrent_to_cell_weights, &_recurrent_to_cell_weights_transposed);
359 _transpose_recurrent_to_output_weights.
configure(recurrent_to_output_weights, &_recurrent_to_output_weights_transposed);
367 _transpose_projection_weights.
configure(_projection_weights, &_projection_weights_transposed);
380 configure_mm(_mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info,
381 input, &_input_to_forget_weights_transposed, &_input_to_forget_eff_bias,
382 &_mm_input_to_forget_res, &_input_to_forget_outstage_res, input_to_forget_scale,
383 mm_out_info, forget_gate_outstage_info);
386 configure_mm(_mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
387 output_state_in, &_recurrent_to_forget_weights_transposed, &_recurrent_to_forget_eff_bias,
388 &_mm_recurrent_to_forget_res, &_recurrent_to_forget_outstage_res, recurrent_to_forget_scale,
389 mm_out_info, forget_gate_outstage_info);
391 _accumulate_input_recurrent_forget.
configure(&_input_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
ConvertPolicy::SATURATE);
397 _memory_group.
manage(&_mul_cell_to_forget_res);
400 _memory_group.
manage(&_cell_to_forget_outstage_res);
403 _cell_to_forget_outstage.
configure(&_mul_cell_to_forget_res,
nullptr, &_cell_to_forget_outstage_res, gemmlowp_info);
405 _accumulate_cell_forget.
configure(&_recurrent_to_forget_outstage_res, &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
ConvertPolicy::SATURATE);
409 Tensor *forget_activation_input = &_recurrent_to_forget_outstage_res;
413 configure_layer_norm(LayerNormGate::Forget, forget_activation_input);
415 forget_activation_input = &get_layer_norm_output(LayerNormGate::Forget);
422 _memory_group.
manage(&_forget_gate);
430 configure_mm(_mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info,
431 input, &_input_to_cell_weights_transposed, &_input_to_cell_eff_bias,
432 &_mm_input_to_cell_res, &_input_to_cell_outstage_res, input_to_cell_scale,
433 mm_out_info, cell_outstage_info);
436 configure_mm(_mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info,
437 output_state_in, &_recurrent_to_cell_weights_transposed, &_recurrent_to_cell_eff_bias,
438 &_mm_recurrent_to_cell_res, &_recurrent_to_cell_outstage_res, recurrent_to_cell_scale,
439 mm_out_info, cell_outstage_info);
441 _accumulate_input_recurrent_modulation.
configure(&_input_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res,
ConvertPolicy::SATURATE);
444 Tensor *cell_activation_input = &_recurrent_to_cell_outstage_res;
448 configure_layer_norm(LayerNormGate::Cell, cell_activation_input);
450 cell_activation_input = &get_layer_norm_output(LayerNormGate::Cell);
455 _memory_group.
manage(&_cell_gate);
463 _memory_group.
manage(&_input_gate);
474 configure_mm(_mm_input_to_input, _input_to_input_outstage, gemmlowp_info,
475 input, &_input_to_input_weights_transposed, &_input_to_input_eff_bias,
476 &_mm_input_to_input_res, &_input_to_input_outstage_res, input_to_input_scale,
477 mm_out_info, input_outstage_info);
480 configure_mm(_mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
481 output_state_in, &_recurrent_to_input_weights_transposed, &_recurrent_to_input_eff_bias,
482 &_mm_recurrent_to_input_res, &_recurrent_to_input_outstage_res, recurrent_to_input_scale,
483 mm_out_info, input_outstage_info);
484 _accumulate_input_recurrent_input.
configure(&_input_to_input_outstage_res, &_recurrent_to_input_outstage_res, &_recurrent_to_input_outstage_res,
ConvertPolicy::SATURATE);
490 _memory_group.
manage(&_mul_cell_to_input_res);
495 _memory_group.
manage(&_cell_to_input_outstage_res);
496 _cell_to_input_outstage.
configure(&_mul_cell_to_input_res,
nullptr, &_cell_to_input_outstage_res, gemmlowp_info);
498 _accumulate_cell_input.
configure(&_recurrent_to_input_outstage_res, &_cell_to_input_outstage_res, &_recurrent_to_input_outstage_res,
ConvertPolicy::SATURATE);
502 Tensor *input_activation_input = &_recurrent_to_input_outstage_res;
506 configure_layer_norm(LayerNormGate::Input, input_activation_input);
508 input_activation_input = &get_layer_norm_output(LayerNormGate::Input);
518 const float mul_input_cell_scale = cell_gate_scale * std::pow(2, 15 + cell_shift);
520 _memory_group.
manage(&_mul_input_cell_res);
527 if(_has_cell_clipping)
534 configure_mm(_mm_input_to_output, _input_to_output_outstage, gemmlowp_info,
535 input, &_input_to_output_weights_transposed, &_input_to_output_eff_bias,
536 &_mm_input_to_output_res, &_input_to_output_outstage_res, input_to_output_scale,
537 mm_out_info, output_outstage_info);
540 configure_mm(_mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
541 output_state_in, &_recurrent_to_output_weights_transposed, &_recurrent_to_output_eff_bias,
542 &_mm_recurrent_to_output_res, &_recurrent_to_output_outstage_res, recurrent_to_output_scale,
543 mm_out_info, output_outstage_info);
545 _accumulate_input_recurrent_output.
configure(&_recurrent_to_output_outstage_res, &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res,
ConvertPolicy::SATURATE);
553 _memory_group.
manage(&_mul_cell_to_output_res);
559 _memory_group.
manage(&_cell_to_output_outstage_res);
560 _cell_to_output_outstage.
configure(&_mul_cell_to_output_res,
nullptr, &_cell_to_output_outstage_res, gemmlowp_info);
563 _accumulate_cell_to_output.
configure(&_recurrent_to_output_outstage_res, &_cell_to_output_outstage_res, &_recurrent_to_output_outstage_res,
ConvertPolicy::SATURATE);
567 Tensor *output_activation_input = &_recurrent_to_output_outstage_res;
571 configure_layer_norm(LayerNormGate::Output, output_activation_input);
573 output_activation_input = &get_layer_norm_output(LayerNormGate::Output);
577 _memory_group.
manage(&_output_gate);
585 _memory_group.
manage(&_hidden_mul_res);
591 const float hidden_state_scale = std::pow(2, -15) / lstm_params.
hidden_state_scale() * std::pow(2, -15);
596 _projection_tensor_copy_required = (num_units !=
output_size);
597 ITensor *hidden_gate_result = output_state_out;
599 _memory_group.
manage(&_hidden_gate);
601 if(_projection_tensor_copy_required)
605 hidden_gate_result = &_hidden_gate;
608 _hidden_outstage.
configure(&_hidden_mul_res,
nullptr, hidden_gate_result, gemmlowp_info);
614 const TensorInfo projection_outstage_info(*output_state_out->
info());
622 TensorInfo projection_mm_out_info{ mm_out_info };
625 configure_mm(_mm_projection, _projection_outstage, gemmlowp_info,
626 hidden_gate_result, &_projection_weights_transposed, &_projection_eff_bias,
627 &_mm_projection_res, &_projection_outstage_res, projection_scale,
628 projection_mm_out_info, projection_outstage_info);
630 ITensor *accumulate_destination = output_state_out;
632 if(_projection_tensor_copy_required)
637 _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
638 accumulate_destination = &_projection_accumulate_res;
644 if(_projection_tensor_copy_required)
646 _projection_accumulate_to_output_copy.configure(_projection_accumulate_res, *output_state_out);
650 int8_t quantized_projection_clip{ 0 };
653 quantized_projection_clip = utility::clamp<int8_t>(lstm_params.
projection_clip() / qprojection.scale, -128, 127);
656 if(quantized_projection_clip > 0)
659 _has_projection_clipping =
true;
664 if(_projection_tensor_copy_required)
666 _hidden_to_output_copy.configure(_hidden_gate, *output_state_out);
672 _copy_output.
configure(output_state_out, output);
683 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
684 recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
685 cell_state_out, output_state_out, output);
691 const unsigned int batch_size = input->
dimension(1);
692 const unsigned int num_units = input_to_output_weights->
dimension(1);
693 const unsigned int output_size = output_state_out->
dimension(_out_state_output_size_dimension_idx);
707 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
712 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
753 const int32_t cell_shift = log2(qcell_state_in.
scale);
757 int16_t quantized_cell_clip = 0;
769 -qinput.offset,
true)));
776 -qoutput_state_in.
offset,
true)));
782 -qoutput_state_in.
offset,
true)));
835 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_forget_scale, &mm_out_info, &forget_outstage_info));
838 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_forget_scale, &mm_out_info, &forget_outstage_info));
870 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_cell_scale, &mm_out_info, &cell_outstage_info));
873 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_cell_scale, &mm_out_info, &cell_outstage_info));
915 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_input_scale, &mm_out_info, &input_outstage_info));
918 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_input_scale, &mm_out_info, &input_outstage_info));
945 if(quantized_cell_clip > 0)
948 quantized_cell_clip)));
954 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_output_scale, &mm_out_info, &output_outstage_info));
957 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_output_scale, &mm_out_info, &output_outstage_info));
989 const float hidden_state_scale = std::pow(2, -15) / lstm_params.
hidden_state_scale() * std::pow(2, -15);
995 const bool projection_tensor_copy_required = num_units !=
output_size;
1011 const TensorInfo projection_outstage_info(*output_state_out);
1014 TensorInfo projection_mm_out_info{ mm_out_info };
1017 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, &hidden_out_info, &projection_weights_transposed, &projection_eff_bias_info, projection_scale, &projection_mm_out_info,
1018 &projection_outstage_info));
1020 if(projection_tensor_copy_required)
1027 if(projection_tensor_copy_required)
1032 int8_t quantized_projection_clip{ 0 };
1038 if(quantized_projection_clip > 0)
1041 quantized_projection_clip)));
1046 if(projection_tensor_copy_required)
1076 _mm_input_to_forget.
run();
1077 _input_to_forget_outstage.
run();
1079 _mm_recurrent_to_forget.
run();
1080 _recurrent_to_forget_outstage.
run();
1081 _accumulate_input_recurrent_forget.
run();
1085 _pixelwise_mul_cell_to_forget.
run();
1086 _cell_to_forget_outstage.
run();
1087 _accumulate_cell_forget.
run();
1095 _forget_gate_sigmoid.
run();
1098 _mm_input_to_cell.
run();
1099 _input_to_cell_outstage.
run();
1101 _mm_recurrent_to_cell.
run();
1102 _recurrent_to_cell_outstage.
run();
1103 _accumulate_input_recurrent_modulation.
run();
1110 _cell_gate_tanh.
run();
1115 _input_gate_sub.
run();
1119 _mm_input_to_input.
run();
1120 _input_to_input_outstage.
run();
1121 _mm_recurrent_to_input.
run();
1122 _recurrent_to_input_outstage.
run();
1123 _accumulate_input_recurrent_input.
run();
1127 _pixelwise_mul_cell_to_input.
run();
1128 _cell_to_input_outstage.
run();
1129 _accumulate_cell_input.
run();
1137 _input_gate_sigmoid.
run();
1141 _pixelwise_mul_forget_cell.
run();
1142 _pixelwise_mul_input_cell.
run();
1143 _add_forget_cell.
run();
1145 if(_has_cell_clipping)
1151 _mm_input_to_output.
run();
1152 _input_to_output_outstage.
run();
1153 _mm_recurrent_to_output.
run();
1154 _recurrent_to_output_outstage.
run();
1155 _accumulate_input_recurrent_output.
run();
1158 _pixelwise_mul_cell_to_output.
run();
1159 _cell_to_output_outstage.
run();
1160 _accumulate_cell_to_output.
run();
1168 _output_gate_sigmoid.
run();
1172 _pixelwise_mul_hidden.
run();
1173 _hidden_outstage.
run();
1178 _mm_projection.
run();
1179 _projection_outstage.
run();
1181 if(_projection_tensor_copy_required)
1183 _projection_output_to_accumulate_copy.run();
1186 _accumulate_projection.
run();
1188 if(_projection_tensor_copy_required)
1190 _projection_accumulate_to_output_copy.run();
1193 if(_has_projection_clipping)
1195 _projection_clip.
run();
1200 if(_projection_tensor_copy_required)
1202 _hidden_to_output_copy.run();
1214 if(_convert_input_to_forget_weights_to_qsymm8)
1218 _dequantize_input_to_forget_weights.
run();
1219 _quantize_input_to_forget_weights.
run();
1229 _transpose_input_to_forget_weights.
run();
1230 _transpose_input_to_cell_weights.
run();
1231 _transpose_input_to_output_weights.
run();
1232 _transpose_recurrent_to_forget_weights.
run();
1233 _transpose_recurrent_to_cell_weights.
run();
1234 _transpose_recurrent_to_output_weights.
run();
1262 _transpose_input_to_input_weights.
run();
1263 _transpose_recurrent_to_input_weights.
run();
1325 if(_projection_bias !=
nullptr)
1327 _projection_bias_add.
run();
1332 _transpose_projection_weights.
run();
1335 if(!_projection_tensor_copy_required)
1350 _is_prepared =
true;
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const T * projection_weights() const
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias)
Static function to check if given info will lead to a valid configuration of NEQLSTMLayerNormalizatio...
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
const T * input_to_input_weights() const
int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit symmetric quantization scheme.
Quantize using a fixed point multiplication.
NEQLSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
quantized, symmetric fixed-point 16-bit number
bool use_layer_norm() const
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticAddition.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool has_peephole_opt() const
T * forget_layer_norm_weights() const
void run() override
Run the kernels contained in the function.
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor. ...
virtual ITensorInfo & set_tensor_shape(const TensorShape &shape)=0
Set the shape of an already initialized tensor.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
void configure(const ITensor *input, ITensor *output)
Configure the kernel.
1 channel, 1 F32 per channel
float output_intermediate_scale() const
bool has_cifg_opt() const
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
float cell_intermediate_scale() const
float forget_intermediate_scale() const
auto recurrent_to_forget_weights
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
T * cell_to_input_weights() const
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8...
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Interface for CPU tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
SimpleTensor< float > src
Copyright (c) 2017-2023 Arm Limited.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params)
Static function to check if given info will lead to a valid configuration of NEQLSTMLayer.
auto input_to_cell_weights
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
DataType data_type() const override
Data type used for each element of the tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's inputs and output.
const T * recurrent_to_input_weights() const
int32_t hidden_state_zero() const
auto input_to_output_weights
const T * projection_bias() const
Quantization information.
T * output_layer_norm_weights() const
float input_intermediate_scale() const
void run() override
Run the kernels contained in the function.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const GEMMLowpReductionKernelInfo &info)
Static function to check if given info will lead to a valid configuration.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
~NEQLSTMLayer()
Default destructor.
void run() override
Run the kernels contained in the function.
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NECopy.
float hidden_state_scale() const
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Basic implementation of the tensor interface.
void configure(const ITensor *input, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, const ITensor *cell_state_in, ITensor *output_state_in, ITensor *cell_state_out, ITensor *output_state_out, ITensor *output, const LSTMParams< ITensor > &lstm_params)
Initialize function's tensors.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
auto recurrent_to_cell_weights
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void configure(const ITensor *input, ITensor *output)
Set the input and output tensors.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
quantized, symmetric fixed-point 8-bit number
src_info set_data_layout(data_layout)
T * cell_to_forget_weights() const
Lower and Upper Bounded Rectifier ( )
bool has_projection() const
float projection_clip() const
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
T * cell_to_output_weights() const
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
Memory group resources scope handling class.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEPixelWiseMultiplicatio...
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
T * input_layer_norm_weights() const
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of NEGEMMLowpOutputStage.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
Basic function to execute GEMMLowpQuantizeDown kernels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
const T * input_gate_bias() const
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
ITensorInfo & set_tensor_shape(const TensorShape &shape) override
Set the shape of an already initialized tensor.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiply...
T * cell_layer_norm_weights() const
quantized, asymmetric fixed-point 8-bit number signed
im2col_func configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias)
void configure(ITensor *input, ITensor *output)
Initialise the function's source and destination.
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8...
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType output_data_type
Output tensor data type to use if the output is not initialized.
Truncates the least significant values that are lost in operations.
void prepare() override
Prepare the function for executing.
auto input_to_forget_weights
Function to run Gemm on quantized types.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NETranspose.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static IScheduler & get()
Access the scheduler singleton.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticSubtraction...