43 Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info,
const ITensorInfo *mm_input,
const ITensorInfo *mm_weights,
const ITensorInfo *bias,
44 float gemmlowp_scale,
const TensorInfo *mm_res_info,
const TensorInfo *outstage_tensor_info)
53 Status NEQLSTMLayer::validate_layer_norm(
const ITensorInfo &in,
const ITensorInfo &weight,
const ITensorInfo &bias)
64 void NEQLSTMLayer::configure_layer_norm(NEQLSTMLayer::LayerNormGate g,
const ITensor *in)
68 Tensor &out = get_layer_norm_output(g);
69 _memory_group.manage(&out);
70 out.allocator()->init(*(in->info()));
72 get_layer_norm(g) = std::make_unique<NEQLSTMLayerNormalizationKernel>();
73 get_layer_norm(g)->configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
76 NEQLSTMLayer::TensorCopyKernel::~TensorCopyKernel() =
default;
92 _row_size = std::min(_src->info()->tensor_shape().x(), _dst->info()->tensor_shape().x());
98 Iterator input_iter{ _src, _window };
99 Iterator output_iter{ _dst, _window };
103 memcpy(output_iter.ptr(), input_iter.ptr(), _row_size);
105 input_iter, output_iter);
111 : _memory_group(), _transpose_input_to_forget_weights(), _transpose_input_to_cell_weights(), _transpose_input_to_output_weights(), _transpose_input_to_input_weights(),
112 _transpose_recurrent_to_forget_weights(), _transpose_recurrent_to_cell_weights(), _transpose_recurrent_to_output_weights(), _transpose_recurrent_to_input_weights(), _transpose_projection_weights(),
113 _input_to_input_reduction(), _recurrent_to_input_reduction(), _input_to_forget_reduction(), _recurrent_to_forget_reduction(), _input_to_cell_reduction(), _recurrent_to_cell_reduction(),
114 _input_to_output_reduction(), _recurrent_to_output_reduction(), _projection_reduction(), _projection_bias_add(), _mm_input_to_forget(), _mm_recurrent_to_forget(), _pixelwise_mul_cell_to_forget(),
115 _input_to_forget_outstage(), _recurrent_to_forget_outstage(), _cell_to_forget_outstage(), _accumulate_input_recurrent_forget(), _accumulate_cell_forget(), _forget_gate_sigmoid(), _mm_input_to_cell(),
116 _input_to_cell_outstage(), _mm_recurrent_to_cell(), _recurrent_to_cell_outstage(), _accumulate_input_recurrent_modulation(), _cell_gate_tanh(), _input_gate_sub(), _mm_input_to_input(),
117 _input_to_input_outstage(), _mm_recurrent_to_input(), _recurrent_to_input_outstage(), _accumulate_input_recurrent_input(), _pixelwise_mul_cell_to_input(), _cell_to_input_outstage(),
118 _accumulate_cell_input(), _input_gate_sigmoid(), _pixelwise_mul_forget_cell(), _pixelwise_mul_input_cell(), _add_forget_cell(), _cell_clip(), _mm_input_to_output(), _input_to_output_outstage(),
119 _mm_recurrent_to_output(), _recurrent_to_output_outstage(), _accumulate_input_recurrent_output(), _pixelwise_mul_cell_to_output(), _cell_to_output_outstage(), _accumulate_cell_to_output(),
120 _output_gate_sigmoid(), _hidden_tanh(), _pixelwise_mul_hidden(), _hidden_outstage(), _mm_projection(), _projection_outstage(), _accumulate_projection(), _projection_clip(), _projection_bias_copy(),
121 _projection_output_to_accumulate_copy(), _projection_accumulate_to_output_copy(), _hidden_to_output_copy(), _layer_norms(), _copy_output(), _layer_norm_weights(), _layer_norm_bias(),
124 _memory_group =
MemoryGroup(std::move(memory_manager));
129 Tensor *mm_res,
Tensor *outstage_res,
float gemmlowp_scale,
132 _memory_group.
manage(mm_res);
133 _memory_group.
manage(outstage_res);
139 mm.
configure(mm_input, mm_weights,
nullptr, mm_res);
143 outstage.
configure(mm_res, bias, outstage_res, gemmlowp_info);
156 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
157 forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
165 recurrent_to_forget_weights->
info(), recurrent_to_cell_weights->
info(), recurrent_to_output_weights->
info(),
166 forget_gate_bias->
info(), cell_bias->
info(), output_gate_bias->
info(),
167 cell_state_in->
info(), output_state_in->
info(), cell_state_out->
info(), output_state_out->
info(), output->
info(),
171 const int num_units = input_to_output_weights->
info()->
dimension(1);
196 set_layer_norm_bias(forget_gate_bias, LayerNormGate::Forget);
197 set_layer_norm_bias(cell_bias, LayerNormGate::Cell);
198 set_layer_norm_bias(lstm_params.
input_gate_bias(), LayerNormGate::Input);
199 set_layer_norm_bias(output_gate_bias, LayerNormGate::Output);
207 const int32_t cell_shift = log2(qcell_state_in.
scale);
210 int16_t quantized_cell_clip = 0;
215 _has_cell_clipping = quantized_cell_clip > 0;
223 _input_to_input_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
224 _recurrent_to_input_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
229 _input_to_forget_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
230 _recurrent_to_forget_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
231 _input_to_cell_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
232 _recurrent_to_cell_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
233 _input_to_output_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
234 _recurrent_to_output_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
244 _projection_reduction = std::make_unique<cpu::kernels::CpuGemmLowpMatrixAReductionKernel>();
246 if(_projection_bias !=
nullptr)
253 _transpose_input_to_forget_weights.
configure(input_to_forget_weights, &_input_to_forget_weights_transposed);
254 _transpose_input_to_cell_weights.
configure(input_to_cell_weights, &_input_to_cell_weights_transposed);
255 _transpose_input_to_output_weights.
configure(input_to_output_weights, &_input_to_output_weights_transposed);
256 _transpose_recurrent_to_forget_weights.
configure(recurrent_to_forget_weights, &_recurrent_to_forget_weights_transposed);
257 _transpose_recurrent_to_cell_weights.
configure(recurrent_to_cell_weights, &_recurrent_to_cell_weights_transposed);
258 _transpose_recurrent_to_output_weights.
configure(recurrent_to_output_weights, &_recurrent_to_output_weights_transposed);
266 _transpose_projection_weights.
configure(_projection_weights, &_projection_weights_transposed);
279 configure_mm(_mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info,
280 input, &_input_to_forget_weights_transposed, &_input_to_forget_eff_bias,
281 &_mm_input_to_forget_res, &_input_to_forget_outstage_res, input_to_forget_scale,
282 mm_out_info, forget_gate_outstage_info);
285 configure_mm(_mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
286 output_state_in, &_recurrent_to_forget_weights_transposed, &_recurrent_to_forget_eff_bias,
287 &_mm_recurrent_to_forget_res, &_recurrent_to_forget_outstage_res, recurrent_to_forget_scale,
288 mm_out_info, forget_gate_outstage_info);
290 _accumulate_input_recurrent_forget.
configure(&_input_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
ConvertPolicy::SATURATE);
296 _memory_group.
manage(&_mul_cell_to_forget_res);
299 _memory_group.
manage(&_cell_to_forget_outstage_res);
302 _cell_to_forget_outstage.
configure(&_mul_cell_to_forget_res,
nullptr, &_cell_to_forget_outstage_res, gemmlowp_info);
304 _accumulate_cell_forget.
configure(&_recurrent_to_forget_outstage_res, &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
ConvertPolicy::SATURATE);
308 Tensor *forget_activation_input = &_recurrent_to_forget_outstage_res;
312 configure_layer_norm(LayerNormGate::Forget, forget_activation_input);
314 forget_activation_input = &get_layer_norm_output(LayerNormGate::Forget);
321 _memory_group.
manage(&_forget_gate);
329 configure_mm(_mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info,
330 input, &_input_to_cell_weights_transposed, &_input_to_cell_eff_bias,
331 &_mm_input_to_cell_res, &_input_to_cell_outstage_res, input_to_cell_scale,
332 mm_out_info, cell_outstage_info);
335 configure_mm(_mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info,
336 output_state_in, &_recurrent_to_cell_weights_transposed, &_recurrent_to_cell_eff_bias,
337 &_mm_recurrent_to_cell_res, &_recurrent_to_cell_outstage_res, recurrent_to_cell_scale,
338 mm_out_info, cell_outstage_info);
340 _accumulate_input_recurrent_modulation.
configure(&_input_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res,
ConvertPolicy::SATURATE);
343 Tensor *cell_activation_input = &_recurrent_to_cell_outstage_res;
347 configure_layer_norm(LayerNormGate::Cell, cell_activation_input);
349 cell_activation_input = &get_layer_norm_output(LayerNormGate::Cell);
354 _memory_group.
manage(&_cell_gate);
362 _memory_group.
manage(&_input_gate);
373 configure_mm(_mm_input_to_input, _input_to_input_outstage, gemmlowp_info,
374 input, &_input_to_input_weights_transposed, &_input_to_input_eff_bias,
375 &_mm_input_to_input_res, &_input_to_input_outstage_res, input_to_input_scale,
376 mm_out_info, input_outstage_info);
379 configure_mm(_mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
380 output_state_in, &_recurrent_to_input_weights_transposed, &_recurrent_to_input_eff_bias,
381 &_mm_recurrent_to_input_res, &_recurrent_to_input_outstage_res, recurrent_to_input_scale,
382 mm_out_info, input_outstage_info);
383 _accumulate_input_recurrent_input.
configure(&_input_to_input_outstage_res, &_recurrent_to_input_outstage_res, &_recurrent_to_input_outstage_res,
ConvertPolicy::SATURATE);
389 _memory_group.
manage(&_mul_cell_to_input_res);
394 _memory_group.
manage(&_cell_to_input_outstage_res);
395 _cell_to_input_outstage.
configure(&_mul_cell_to_input_res,
nullptr, &_cell_to_input_outstage_res, gemmlowp_info);
397 _accumulate_cell_input.
configure(&_recurrent_to_input_outstage_res, &_cell_to_input_outstage_res, &_recurrent_to_input_outstage_res,
ConvertPolicy::SATURATE);
401 Tensor *input_activation_input = &_recurrent_to_input_outstage_res;
405 configure_layer_norm(LayerNormGate::Input, input_activation_input);
407 input_activation_input = &get_layer_norm_output(LayerNormGate::Input);
417 const float mul_input_cell_scale = cell_gate_scale * std::pow(2, 15 + cell_shift);
419 _memory_group.
manage(&_mul_input_cell_res);
426 if(_has_cell_clipping)
433 configure_mm(_mm_input_to_output, _input_to_output_outstage, gemmlowp_info,
434 input, &_input_to_output_weights_transposed, &_input_to_output_eff_bias,
435 &_mm_input_to_output_res, &_input_to_output_outstage_res, input_to_output_scale,
436 mm_out_info, output_outstage_info);
439 configure_mm(_mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
440 output_state_in, &_recurrent_to_output_weights_transposed, &_recurrent_to_output_eff_bias,
441 &_mm_recurrent_to_output_res, &_recurrent_to_output_outstage_res, recurrent_to_output_scale,
442 mm_out_info, output_outstage_info);
444 _accumulate_input_recurrent_output.
configure(&_recurrent_to_output_outstage_res, &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res,
ConvertPolicy::SATURATE);
452 _memory_group.
manage(&_mul_cell_to_output_res);
458 _memory_group.
manage(&_cell_to_output_outstage_res);
459 _cell_to_output_outstage.
configure(&_mul_cell_to_output_res,
nullptr, &_cell_to_output_outstage_res, gemmlowp_info);
462 _accumulate_cell_to_output.
configure(&_recurrent_to_output_outstage_res, &_cell_to_output_outstage_res, &_recurrent_to_output_outstage_res,
ConvertPolicy::SATURATE);
466 Tensor *output_activation_input = &_recurrent_to_output_outstage_res;
470 configure_layer_norm(LayerNormGate::Output, output_activation_input);
472 output_activation_input = &get_layer_norm_output(LayerNormGate::Output);
476 _memory_group.
manage(&_output_gate);
484 _memory_group.
manage(&_hidden_mul_res);
490 const float hidden_state_scale = std::pow(2, -15) / lstm_params.
hidden_state_scale() * std::pow(2, -15);
495 _projection_tensor_copy_required = (num_units !=
output_size);
496 ITensor *hidden_gate_result = output_state_out;
498 _memory_group.
manage(&_hidden_gate);
500 if(_projection_tensor_copy_required)
504 hidden_gate_result = &_hidden_gate;
507 _hidden_outstage.
configure(&_hidden_mul_res,
nullptr, hidden_gate_result, gemmlowp_info);
513 const TensorInfo projection_outstage_info(*output_state_out->
info());
521 TensorInfo projection_mm_out_info{ mm_out_info };
524 configure_mm(_mm_projection, _projection_outstage, gemmlowp_info,
525 hidden_gate_result, &_projection_weights_transposed, &_projection_eff_bias,
526 &_mm_projection_res, &_projection_outstage_res, projection_scale,
527 projection_mm_out_info, projection_outstage_info);
529 ITensor *accumulate_destination = output_state_out;
531 if(_projection_tensor_copy_required)
536 _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
537 accumulate_destination = &_projection_accumulate_res;
543 if(_projection_tensor_copy_required)
545 _projection_accumulate_to_output_copy.configure(_projection_accumulate_res, *output_state_out);
549 int8_t quantized_projection_clip{ 0 };
552 quantized_projection_clip = utility::clamp<int8_t>(lstm_params.
projection_clip() / qprojection.scale, -128, 127);
555 if(quantized_projection_clip > 0)
558 _has_projection_clipping =
true;
563 if(_projection_tensor_copy_required)
565 _hidden_to_output_copy.configure(_hidden_gate, *output_state_out);
571 _copy_output.
configure(output_state_out, output);
582 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
583 recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
584 cell_state_out, output_state_out, output);
590 const unsigned int batch_size = input->
dimension(1);
591 const unsigned int num_units = input_to_output_weights->
dimension(1);
592 const unsigned int output_size = output_state_out->
dimension(_out_state_output_size_dimension_idx);
602 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
643 const int32_t cell_shift = log2(qcell_state_in.
scale);
647 int16_t quantized_cell_clip = 0;
659 -qinput.offset,
true)));
666 -qoutput_state_in.
offset,
true)));
672 -qoutput_state_in.
offset,
true)));
719 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_forget_scale, &mm_out_info, &forget_outstage_info));
722 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_forget_scale, &mm_out_info, &forget_outstage_info));
754 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_cell_scale, &mm_out_info, &cell_outstage_info));
757 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_cell_scale, &mm_out_info, &cell_outstage_info));
790 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_input_scale, &mm_out_info, &input_outstage_info));
793 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_input_scale, &mm_out_info, &input_outstage_info));
820 if(quantized_cell_clip > 0)
823 quantized_cell_clip)));
829 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_output_scale, &mm_out_info, &output_outstage_info));
832 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_output_scale, &mm_out_info, &output_outstage_info));
864 const float hidden_state_scale = std::pow(2, -15) / lstm_params.
hidden_state_scale() * std::pow(2, -15);
870 const bool projection_tensor_copy_required = num_units !=
output_size;
886 const TensorInfo projection_outstage_info(*output_state_out);
889 TensorInfo projection_mm_out_info{ mm_out_info };
892 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, &hidden_out_info, &projection_weights_transposed, &projection_eff_bias_info, projection_scale, &projection_mm_out_info,
893 &projection_outstage_info));
895 if(projection_tensor_copy_required)
902 if(projection_tensor_copy_required)
907 int8_t quantized_projection_clip{ 0 };
913 if(quantized_projection_clip > 0)
916 quantized_projection_clip)));
921 if(projection_tensor_copy_required)
951 _mm_input_to_forget.
run();
952 _input_to_forget_outstage.
run();
954 _mm_recurrent_to_forget.
run();
955 _recurrent_to_forget_outstage.
run();
956 _accumulate_input_recurrent_forget.
run();
960 _pixelwise_mul_cell_to_forget.
run();
961 _cell_to_forget_outstage.
run();
962 _accumulate_cell_forget.
run();
970 _forget_gate_sigmoid.
run();
973 _mm_input_to_cell.
run();
974 _input_to_cell_outstage.
run();
976 _mm_recurrent_to_cell.
run();
977 _recurrent_to_cell_outstage.
run();
978 _accumulate_input_recurrent_modulation.
run();
985 _cell_gate_tanh.
run();
990 _input_gate_sub.
run();
994 _mm_input_to_input.
run();
995 _input_to_input_outstage.
run();
996 _mm_recurrent_to_input.
run();
997 _recurrent_to_input_outstage.
run();
998 _accumulate_input_recurrent_input.
run();
1002 _pixelwise_mul_cell_to_input.
run();
1003 _cell_to_input_outstage.
run();
1004 _accumulate_cell_input.
run();
1012 _input_gate_sigmoid.
run();
1016 _pixelwise_mul_forget_cell.
run();
1017 _pixelwise_mul_input_cell.
run();
1018 _add_forget_cell.
run();
1020 if(_has_cell_clipping)
1026 _mm_input_to_output.
run();
1027 _input_to_output_outstage.
run();
1028 _mm_recurrent_to_output.
run();
1029 _recurrent_to_output_outstage.
run();
1030 _accumulate_input_recurrent_output.
run();
1033 _pixelwise_mul_cell_to_output.
run();
1034 _cell_to_output_outstage.
run();
1035 _accumulate_cell_to_output.
run();
1043 _output_gate_sigmoid.
run();
1047 _pixelwise_mul_hidden.
run();
1048 _hidden_outstage.
run();
1053 _mm_projection.
run();
1054 _projection_outstage.
run();
1056 if(_projection_tensor_copy_required)
1058 _projection_output_to_accumulate_copy.run();
1061 _accumulate_projection.
run();
1063 if(_projection_tensor_copy_required)
1065 _projection_accumulate_to_output_copy.run();
1068 if(_has_projection_clipping)
1070 _projection_clip.
run();
1075 if(_projection_tensor_copy_required)
1077 _hidden_to_output_copy.run();
1096 _transpose_input_to_forget_weights.
run();
1097 _transpose_input_to_cell_weights.
run();
1098 _transpose_input_to_output_weights.
run();
1099 _transpose_recurrent_to_forget_weights.
run();
1100 _transpose_recurrent_to_cell_weights.
run();
1101 _transpose_recurrent_to_output_weights.
run();
1129 _transpose_input_to_input_weights.
run();
1130 _transpose_recurrent_to_input_weights.
run();
1192 if(_projection_bias !=
nullptr)
1194 _projection_bias_add.
run();
1199 _transpose_projection_weights.
run();
1202 if(!_projection_tensor_copy_required)
1217 _is_prepared =
true;
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const T * projection_weights() const
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias)
Static function to check if given info will lead to a valid configuration of NEQLSTMLayerNormalizatio...
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
const T * input_to_input_weights() const
int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit symmetric quantization scheme.
Quantize using a fixed point multiplication.
NEQLSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
quantized, symmetric fixed-point 16-bit number
bool use_layer_norm() const
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticAddition.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool has_peephole_opt() const
T * forget_layer_norm_weights() const
void run() override
Run the kernels contained in the function.
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor. ...
virtual ITensorInfo & set_tensor_shape(const TensorShape &shape)=0
Set the shape of an already initialized tensor.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
float output_intermediate_scale() const
bool has_cifg_opt() const
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
float cell_intermediate_scale() const
float forget_intermediate_scale() const
auto recurrent_to_forget_weights
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
T * cell_to_input_weights() const
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8...
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Interface for CPU tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params)
Static function to check if given info will lead to a valid configuration of NEQLSTMLayer.
auto input_to_cell_weights
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
DataType data_type() const override
Data type used for each element of the tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's inputs and output.
const T * recurrent_to_input_weights() const
int32_t hidden_state_zero() const
auto input_to_output_weights
const T * projection_bias() const
Quantization information.
T * output_layer_norm_weights() const
float input_intermediate_scale() const
void run() override
Run the kernels contained in the function.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const GEMMLowpReductionKernelInfo &info)
Static function to check if given info will lead to a valid configuration.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
~NEQLSTMLayer()
Default destructor.
void run() override
Run the kernels contained in the function.
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NECopy.
float hidden_state_scale() const
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Basic implementation of the tensor interface.
void configure(const ITensor *input, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, const ITensor *cell_state_in, ITensor *output_state_in, ITensor *cell_state_out, ITensor *output_state_out, ITensor *output, const LSTMParams< ITensor > &lstm_params)
Initialize function's tensors.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
auto recurrent_to_cell_weights
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
quantized, symmetric fixed-point 8-bit number
T * cell_to_forget_weights() const
Lower and Upper Bounded Rectifier ( )
bool has_projection() const
float projection_clip() const
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
T * cell_to_output_weights() const
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
Memory group resources scope handling class.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEPixelWiseMultiplicatio...
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
T * input_layer_norm_weights() const
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of NEGEMMLowpOutputStage.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
Basic function to execute GEMMLowpQuantizeDown kernels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
const T * input_gate_bias() const
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
ITensorInfo & set_tensor_shape(const TensorShape &shape) override
Set the shape of an already initialized tensor.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiply...
T * cell_layer_norm_weights() const
quantized, asymmetric fixed-point 8-bit number signed
im2col_func configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias)
void configure(ITensor *input, ITensor *output)
Initialise the function's source and destination.
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8...
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType output_data_type
Output tensor data type to use if the output is not initialized.
Truncates the least significant values that are lost in operations.
void prepare() override
Prepare the function for executing.
auto input_to_forget_weights
Function to run Gemm on quantized types.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NETranspose.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
static IScheduler & get()
Access the scheduler singleton.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticSubtraction...