48 Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info,
const ITensorInfo *mm_input,
const ITensorInfo *mm_weights,
const ITensorInfo *bias,
49 float gemmlowp_scale,
const TensorInfo *mm_res_info,
const TensorInfo *outstage_tensor_info)
58 Status NEQLSTMLayer::validate_layer_norm(
const ITensorInfo &in,
const ITensorInfo &weight,
const ITensorInfo &bias)
69 void NEQLSTMLayer::configure_layer_norm(NEQLSTMLayer::LayerNormGate g,
const ITensor *in)
73 Tensor &out = get_layer_norm_output(g);
74 _memory_group.manage(&out);
75 out.allocator()->init(*(in->info()));
77 get_layer_norm(g) = std::make_unique<NEQLSTMLayerNormalizationKernel>();
78 get_layer_norm(g)->configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
81 NEQLSTMLayer::TensorCopyKernel::~TensorCopyKernel() =
default;
92 void NEQLSTMLayer::TensorCopyKernel::configure(ITensor &
src, ITensor &
dst)
97 _row_size = std::min(_src->info()->tensor_shape().x(), _dst->info()->tensor_shape().x());
103 Iterator input_iter{ _src, _window };
104 Iterator output_iter{ _dst, _window };
108 memcpy(output_iter.ptr(), input_iter.ptr(), _row_size);
110 input_iter, output_iter);
116 : _memory_group(), _transpose_input_to_forget_weights(), _transpose_input_to_cell_weights(), _transpose_input_to_output_weights(), _transpose_input_to_input_weights(),
117 _transpose_recurrent_to_forget_weights(), _transpose_recurrent_to_cell_weights(), _transpose_recurrent_to_output_weights(), _transpose_recurrent_to_input_weights(), _transpose_projection_weights(),
118 _input_to_input_reduction(), _recurrent_to_input_reduction(), _input_to_forget_reduction(), _recurrent_to_forget_reduction(), _input_to_cell_reduction(), _recurrent_to_cell_reduction(),
119 _input_to_output_reduction(), _recurrent_to_output_reduction(), _projection_reduction(), _projection_bias_add(), _mm_input_to_forget(), _mm_recurrent_to_forget(), _pixelwise_mul_cell_to_forget(),
120 _input_to_forget_outstage(), _recurrent_to_forget_outstage(), _cell_to_forget_outstage(), _accumulate_input_recurrent_forget(), _accumulate_cell_forget(), _forget_gate_sigmoid(), _mm_input_to_cell(),
121 _input_to_cell_outstage(), _mm_recurrent_to_cell(), _recurrent_to_cell_outstage(), _accumulate_input_recurrent_modulation(), _cell_gate_tanh(), _input_gate_sub(), _mm_input_to_input(),
122 _input_to_input_outstage(), _mm_recurrent_to_input(), _recurrent_to_input_outstage(), _accumulate_input_recurrent_input(), _pixelwise_mul_cell_to_input(), _cell_to_input_outstage(),
123 _accumulate_cell_input(), _input_gate_sigmoid(), _pixelwise_mul_forget_cell(), _pixelwise_mul_input_cell(), _add_forget_cell(), _cell_clip(), _mm_input_to_output(), _input_to_output_outstage(),
124 _mm_recurrent_to_output(), _recurrent_to_output_outstage(), _accumulate_input_recurrent_output(), _pixelwise_mul_cell_to_output(), _cell_to_output_outstage(), _accumulate_cell_to_output(),
125 _output_gate_sigmoid(), _hidden_tanh(), _pixelwise_mul_hidden(), _hidden_outstage(), _mm_projection(), _projection_outstage(), _accumulate_projection(), _projection_clip(), _projection_bias_copy(),
126 _projection_output_to_accumulate_copy(), _projection_accumulate_to_output_copy(), _hidden_to_output_copy(), _layer_norms(), _copy_output(), _layer_norm_weights(), _layer_norm_bias(),
129 _memory_group =
MemoryGroup(std::move(memory_manager));
134 Tensor *mm_res,
Tensor *outstage_res,
float gemmlowp_scale,
137 _memory_group.
manage(mm_res);
138 _memory_group.
manage(outstage_res);
144 mm.
configure(mm_input, mm_weights,
nullptr, mm_res);
148 outstage.
configure(mm_res, bias, outstage_res, gemmlowp_info);
172 cell_state_in->
info(), output_state_in->
info(), cell_state_out->
info(), output_state_out->
info(), output->
info(),
175 const int batch_size =
input->info()->dimension(1);
202 set_layer_norm_bias(cell_bias, LayerNormGate::Cell);
203 set_layer_norm_bias(lstm_params.
input_gate_bias(), LayerNormGate::Input);
212 const int32_t cell_shift = log2(qcell_state_in.
scale);
215 int16_t quantized_cell_clip = 0;
220 _has_cell_clipping = quantized_cell_clip > 0;
228 _input_to_input_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
229 _recurrent_to_input_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
231 _recurrent_to_input_reduction->configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias,
GEMMLowpReductionKernelInfo(num_units,
false, -qoutput_state_in.
offset,
true));
234 _input_to_forget_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
235 _recurrent_to_forget_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
236 _input_to_cell_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
237 _recurrent_to_cell_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
238 _input_to_output_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
239 _recurrent_to_output_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
249 _projection_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
251 if(_projection_bias !=
nullptr)
271 _transpose_projection_weights.
configure(_projection_weights, &_projection_weights_transposed);
284 configure_mm(_mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info,
285 input, &_input_to_forget_weights_transposed, &_input_to_forget_eff_bias,
286 &_mm_input_to_forget_res, &_input_to_forget_outstage_res, input_to_forget_scale,
287 mm_out_info, forget_gate_outstage_info);
290 configure_mm(_mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
291 output_state_in, &_recurrent_to_forget_weights_transposed, &_recurrent_to_forget_eff_bias,
292 &_mm_recurrent_to_forget_res, &_recurrent_to_forget_outstage_res, recurrent_to_forget_scale,
293 mm_out_info, forget_gate_outstage_info);
295 _accumulate_input_recurrent_forget.
configure(&_input_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
ConvertPolicy::SATURATE);
301 _memory_group.
manage(&_mul_cell_to_forget_res);
304 _memory_group.
manage(&_cell_to_forget_outstage_res);
307 _cell_to_forget_outstage.
configure(&_mul_cell_to_forget_res,
nullptr, &_cell_to_forget_outstage_res, gemmlowp_info);
309 _accumulate_cell_forget.
configure(&_recurrent_to_forget_outstage_res, &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
ConvertPolicy::SATURATE);
313 Tensor *forget_activation_input = &_recurrent_to_forget_outstage_res;
317 configure_layer_norm(LayerNormGate::Forget, forget_activation_input);
319 forget_activation_input = &get_layer_norm_output(LayerNormGate::Forget);
326 _memory_group.
manage(&_forget_gate);
334 configure_mm(_mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info,
335 input, &_input_to_cell_weights_transposed, &_input_to_cell_eff_bias,
336 &_mm_input_to_cell_res, &_input_to_cell_outstage_res, input_to_cell_scale,
337 mm_out_info, cell_outstage_info);
340 configure_mm(_mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info,
341 output_state_in, &_recurrent_to_cell_weights_transposed, &_recurrent_to_cell_eff_bias,
342 &_mm_recurrent_to_cell_res, &_recurrent_to_cell_outstage_res, recurrent_to_cell_scale,
343 mm_out_info, cell_outstage_info);
345 _accumulate_input_recurrent_modulation.
configure(&_input_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res,
ConvertPolicy::SATURATE);
348 Tensor *cell_activation_input = &_recurrent_to_cell_outstage_res;
352 configure_layer_norm(LayerNormGate::Cell, cell_activation_input);
354 cell_activation_input = &get_layer_norm_output(LayerNormGate::Cell);
359 _memory_group.
manage(&_cell_gate);
367 _memory_group.
manage(&_input_gate);
378 configure_mm(_mm_input_to_input, _input_to_input_outstage, gemmlowp_info,
379 input, &_input_to_input_weights_transposed, &_input_to_input_eff_bias,
380 &_mm_input_to_input_res, &_input_to_input_outstage_res, input_to_input_scale,
381 mm_out_info, input_outstage_info);
384 configure_mm(_mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
385 output_state_in, &_recurrent_to_input_weights_transposed, &_recurrent_to_input_eff_bias,
386 &_mm_recurrent_to_input_res, &_recurrent_to_input_outstage_res, recurrent_to_input_scale,
387 mm_out_info, input_outstage_info);
388 _accumulate_input_recurrent_input.
configure(&_input_to_input_outstage_res, &_recurrent_to_input_outstage_res, &_recurrent_to_input_outstage_res,
ConvertPolicy::SATURATE);
394 _memory_group.
manage(&_mul_cell_to_input_res);
399 _memory_group.
manage(&_cell_to_input_outstage_res);
400 _cell_to_input_outstage.
configure(&_mul_cell_to_input_res,
nullptr, &_cell_to_input_outstage_res, gemmlowp_info);
402 _accumulate_cell_input.
configure(&_recurrent_to_input_outstage_res, &_cell_to_input_outstage_res, &_recurrent_to_input_outstage_res,
ConvertPolicy::SATURATE);
406 Tensor *input_activation_input = &_recurrent_to_input_outstage_res;
410 configure_layer_norm(LayerNormGate::Input, input_activation_input);
412 input_activation_input = &get_layer_norm_output(LayerNormGate::Input);
422 const float mul_input_cell_scale = cell_gate_scale * std::pow(2, 15 + cell_shift);
424 _memory_group.
manage(&_mul_input_cell_res);
431 if(_has_cell_clipping)
438 configure_mm(_mm_input_to_output, _input_to_output_outstage, gemmlowp_info,
439 input, &_input_to_output_weights_transposed, &_input_to_output_eff_bias,
440 &_mm_input_to_output_res, &_input_to_output_outstage_res, input_to_output_scale,
441 mm_out_info, output_outstage_info);
444 configure_mm(_mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
445 output_state_in, &_recurrent_to_output_weights_transposed, &_recurrent_to_output_eff_bias,
446 &_mm_recurrent_to_output_res, &_recurrent_to_output_outstage_res, recurrent_to_output_scale,
447 mm_out_info, output_outstage_info);
449 _accumulate_input_recurrent_output.
configure(&_recurrent_to_output_outstage_res, &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res,
ConvertPolicy::SATURATE);
457 _memory_group.
manage(&_mul_cell_to_output_res);
463 _memory_group.
manage(&_cell_to_output_outstage_res);
464 _cell_to_output_outstage.
configure(&_mul_cell_to_output_res,
nullptr, &_cell_to_output_outstage_res, gemmlowp_info);
467 _accumulate_cell_to_output.
configure(&_recurrent_to_output_outstage_res, &_cell_to_output_outstage_res, &_recurrent_to_output_outstage_res,
ConvertPolicy::SATURATE);
471 Tensor *output_activation_input = &_recurrent_to_output_outstage_res;
475 configure_layer_norm(LayerNormGate::Output, output_activation_input);
477 output_activation_input = &get_layer_norm_output(LayerNormGate::Output);
481 _memory_group.
manage(&_output_gate);
489 _memory_group.
manage(&_hidden_mul_res);
495 const float hidden_state_scale = std::pow(2, -15) / lstm_params.
hidden_state_scale() * std::pow(2, -15);
500 _projection_tensor_copy_required = (num_units !=
output_size);
501 ITensor *hidden_gate_result = output_state_out;
503 _memory_group.
manage(&_hidden_gate);
505 if(_projection_tensor_copy_required)
509 hidden_gate_result = &_hidden_gate;
512 _hidden_outstage.
configure(&_hidden_mul_res,
nullptr, hidden_gate_result, gemmlowp_info);
518 const TensorInfo projection_outstage_info(*output_state_out->
info());
526 TensorInfo projection_mm_out_info{ mm_out_info };
529 configure_mm(_mm_projection, _projection_outstage, gemmlowp_info,
530 hidden_gate_result, &_projection_weights_transposed, &_projection_eff_bias,
531 &_mm_projection_res, &_projection_outstage_res, projection_scale,
532 projection_mm_out_info, projection_outstage_info);
534 ITensor *accumulate_destination = output_state_out;
536 if(_projection_tensor_copy_required)
541 _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
542 accumulate_destination = &_projection_accumulate_res;
548 if(_projection_tensor_copy_required)
550 _projection_accumulate_to_output_copy.configure(_projection_accumulate_res, *output_state_out);
554 int8_t quantized_projection_clip{ 0 };
557 quantized_projection_clip = utility::clamp<int8_t>(lstm_params.
projection_clip() / qprojection.scale, -128, 127);
560 if(quantized_projection_clip > 0)
563 _has_projection_clipping =
true;
568 if(_projection_tensor_copy_required)
570 _hidden_to_output_copy.configure(_hidden_gate, *output_state_out);
576 _copy_output.
configure(output_state_out, output);
589 cell_state_out, output_state_out, output);
595 const unsigned int batch_size =
input->dimension(1);
597 const unsigned int output_size = output_state_out->
dimension(_out_state_output_size_dimension_idx);
648 const int32_t cell_shift = log2(qcell_state_in.
scale);
652 int16_t quantized_cell_clip = 0;
719 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info,
input, &input_weights_transposed, &eff_bias_info, input_to_forget_scale, &mm_out_info, &forget_outstage_info));
722 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_forget_scale, &mm_out_info, &forget_outstage_info));
754 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info,
input, &input_weights_transposed, &eff_bias_info, input_to_cell_scale, &mm_out_info, &cell_outstage_info));
757 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_cell_scale, &mm_out_info, &cell_outstage_info));
790 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info,
input, &input_weights_transposed, &eff_bias_info, input_to_input_scale, &mm_out_info, &input_outstage_info));
793 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_input_scale, &mm_out_info, &input_outstage_info));
820 if(quantized_cell_clip > 0)
823 quantized_cell_clip)));
829 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info,
input, &input_weights_transposed, &eff_bias_info, input_to_output_scale, &mm_out_info, &output_outstage_info));
832 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_output_scale, &mm_out_info, &output_outstage_info));
864 const float hidden_state_scale = std::pow(2, -15) / lstm_params.
hidden_state_scale() * std::pow(2, -15);
870 const bool projection_tensor_copy_required = num_units !=
output_size;
886 const TensorInfo projection_outstage_info(*output_state_out);
889 TensorInfo projection_mm_out_info{ mm_out_info };
892 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, &hidden_out_info, &projection_weights_transposed, &projection_eff_bias_info, projection_scale, &projection_mm_out_info,
893 &projection_outstage_info));
895 if(projection_tensor_copy_required)
902 if(projection_tensor_copy_required)
907 int8_t quantized_projection_clip{ 0 };
913 if(quantized_projection_clip > 0)
916 quantized_projection_clip)));
921 if(projection_tensor_copy_required)
951 _mm_input_to_forget.
run();
952 _input_to_forget_outstage.
run();
954 _mm_recurrent_to_forget.
run();
955 _recurrent_to_forget_outstage.
run();
956 _accumulate_input_recurrent_forget.
run();
960 _pixelwise_mul_cell_to_forget.
run();
961 _cell_to_forget_outstage.
run();
962 _accumulate_cell_forget.
run();
970 _forget_gate_sigmoid.
run();
973 _mm_input_to_cell.
run();
974 _input_to_cell_outstage.
run();
976 _mm_recurrent_to_cell.
run();
977 _recurrent_to_cell_outstage.
run();
978 _accumulate_input_recurrent_modulation.
run();
985 _cell_gate_tanh.
run();
990 _input_gate_sub.
run();
994 _mm_input_to_input.
run();
995 _input_to_input_outstage.
run();
996 _mm_recurrent_to_input.
run();
997 _recurrent_to_input_outstage.
run();
998 _accumulate_input_recurrent_input.
run();
1002 _pixelwise_mul_cell_to_input.
run();
1003 _cell_to_input_outstage.
run();
1004 _accumulate_cell_input.
run();
1012 _input_gate_sigmoid.
run();
1016 _pixelwise_mul_forget_cell.
run();
1017 _pixelwise_mul_input_cell.
run();
1018 _add_forget_cell.
run();
1020 if(_has_cell_clipping)
1026 _mm_input_to_output.
run();
1027 _input_to_output_outstage.
run();
1028 _mm_recurrent_to_output.
run();
1029 _recurrent_to_output_outstage.
run();
1030 _accumulate_input_recurrent_output.
run();
1033 _pixelwise_mul_cell_to_output.
run();
1034 _cell_to_output_outstage.
run();
1035 _accumulate_cell_to_output.
run();
1043 _output_gate_sigmoid.
run();
1047 _pixelwise_mul_hidden.
run();
1048 _hidden_outstage.
run();
1053 _mm_projection.
run();
1054 _projection_outstage.
run();
1056 if(_projection_tensor_copy_required)
1058 _projection_output_to_accumulate_copy.run();
1061 _accumulate_projection.
run();
1063 if(_projection_tensor_copy_required)
1065 _projection_accumulate_to_output_copy.run();
1068 if(_has_projection_clipping)
1070 _projection_clip.
run();
1075 if(_projection_tensor_copy_required)
1077 _hidden_to_output_copy.run();
1096 _transpose_input_to_forget_weights.
run();
1097 _transpose_input_to_cell_weights.
run();
1098 _transpose_input_to_output_weights.
run();
1099 _transpose_recurrent_to_forget_weights.
run();
1100 _transpose_recurrent_to_cell_weights.
run();
1101 _transpose_recurrent_to_output_weights.
run();
1117 _transpose_input_to_input_weights.
run();
1118 _transpose_recurrent_to_input_weights.
run();
1139 if(_projection_bias !=
nullptr)
1141 _projection_bias_add.
run();
1146 _transpose_projection_weights.
run();
1149 if(!_projection_tensor_copy_required)
1164 _is_prepared =
true;
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const T * projection_weights() const
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias)
Static function to check if given info will lead to a valid configuration of NEQLSTMLayerNormalizatio...
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
const T * input_to_input_weights() const
int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit symmetric quantization scheme.
Quantize using a fixed point multiplication.
NEQLSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
void run() override final
Run the kernels contained in the function.
quantized, symmetric fixed-point 16-bit number
bool use_layer_norm() const
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticAddition.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool has_peephole_opt() const
T * forget_layer_norm_weights() const
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor.
virtual ITensorInfo & set_tensor_shape(const TensorShape &shape)=0
Set the shape of an already initialized tensor.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
float output_intermediate_scale() const
bool has_cifg_opt() const
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
float cell_intermediate_scale() const
float forget_intermediate_scale() const
auto recurrent_to_forget_weights
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
T * cell_to_input_weights() const
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Interface for CPU tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params)
Static function to check if given info will lead to a valid configuration of NEQLSTMLayer.
auto input_to_cell_weights
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
DataType data_type() const override
Data type used for each element of the tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's inputs and output.
const T * recurrent_to_input_weights() const
int32_t hidden_state_zero() const
auto input_to_output_weights
const T * projection_bias() const
Quantization information.
T * output_layer_norm_weights() const
float input_intermediate_scale() const
void run() override
Run the kernels contained in the function.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
~NEQLSTMLayer()
Default destructor.
void run() override
Run the kernels contained in the function.
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NECopy.
float hidden_state_scale() const
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Basic implementation of the tensor interface.
void configure(const ITensor *input, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, const ITensor *cell_state_in, ITensor *output_state_in, ITensor *cell_state_out, ITensor *output_state_out, ITensor *output, const LSTMParams< ITensor > &lstm_params)
Initialize function's tensors.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
auto recurrent_to_cell_weights
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
quantized, symmetric fixed-point 8-bit number
T * cell_to_forget_weights() const
Lower and Upper Bounded Rectifier ( )
bool has_projection() const
float projection_clip() const
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
T * cell_to_output_weights() const
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
Memory group resources scope handling class.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEPixelWiseMultiplicatio...
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
T * input_layer_norm_weights() const
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of NEGEMMLowpOutputStage.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
Basic function to execute GEMMLowpQuantizeDown kernels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
const T * input_gate_bias() const
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
ITensorInfo & set_tensor_shape(const TensorShape &shape) override
Set the shape of an already initialized tensor.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiply...
T * cell_layer_norm_weights() const
quantized, asymmetric fixed-point 8-bit number signed
void configure(ITensor *input, ITensor *output)
Initialise the function's source and destination.
static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info)
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixAReducti...
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType output_data_type
Output tensor data type to use if the output is not initialized.
Truncates the least significant values that are lost in operations.
void prepare() override
Prepare the function for executing.
auto input_to_forget_weights
Basic function to execute GEMMLowpMatrixMultiplyCore.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NETranspose.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
static IScheduler & get()
Access the scheduler singleton.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticSubtraction.