44 Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info,
const ITensorInfo *mm_input,
const ITensorInfo *mm_weights,
const ITensorInfo *bias,
45 float gemmlowp_scale,
const TensorInfo *mm_res_info,
const TensorInfo *outstage_tensor_info)
68 _row_size = std::min(_src->info()->tensor_shape().x(), _dst->info()->tensor_shape().x());
79 Iterator input_iter{ _src, _window };
80 Iterator output_iter{ _dst, _window };
84 memcpy(output_iter.ptr(), input_iter.ptr(), _row_size);
86 input_iter, output_iter);
105 for(
auto &norm : _layer_norms)
107 norm = std::make_unique<CLQLSTMLayerNormalizationKernel>();
110 _memory_group =
MemoryGroup(std::move(memory_manager));
115 void CLQLSTMLayer::configure_layer_norm(LayerNormGate g,
const ICLTensor *in)
119 CLTensor *out = &get_layer_norm_output(g);
120 _memory_group.
manage(out);
123 get_layer_norm(g).
configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g));
142 _memory_group.
manage(mm_res);
143 _memory_group.
manage(outstage_res);
149 mm.
configure(compile_context, mm_input, mm_weights,
nullptr, mm_res);
153 outstage.
configure(compile_context, mm_res, bias, outstage_res, gemmlowp_info);
166 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias,
167 cell_state_in, output_state_in, cell_state_out, output_state_out, output, lstm_params);
179 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
180 forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
181 cell_state_out, output_state_out, output);
189 recurrent_to_forget_weights->
info(), recurrent_to_cell_weights->
info(), recurrent_to_output_weights->
info(),
190 forget_gate_bias->
info(), cell_bias->
info(), output_gate_bias->
info(),
191 cell_state_in->
info(), output_state_in->
info(), cell_state_out->
info(), output_state_out->
info(), output->
info(),
195 const int num_units = input_to_output_weights->
info()->
dimension(1);
220 set_layer_norm_bias(forget_gate_bias, LayerNormGate::Forget);
221 set_layer_norm_bias(cell_bias, LayerNormGate::Cell);
222 set_layer_norm_bias(lstm_params.
input_gate_bias(), LayerNormGate::Input);
223 set_layer_norm_bias(output_gate_bias, LayerNormGate::Output);
231 const int32_t cell_shift = log2(qcell_state_in.
scale);
234 int16_t quantized_cell_clip = 0;
239 _has_cell_clipping = quantized_cell_clip > 0;
248 _recurrent_to_input_reduction->configure(compile_context, _recurrent_to_input_weights->
info(), _recurrent_to_input_eff_bias.
info(),
GEMMLowpReductionKernelInfo(num_units,
false,
249 -qoutput_state_in.
offset,
true));
252 _recurrent_to_forget_reduction->configure(compile_context, recurrent_to_forget_weights->
info(), _recurrent_to_forget_eff_bias.
info(),
GEMMLowpReductionKernelInfo(num_units,
false,
253 -qoutput_state_in.
offset,
true));
258 _recurrent_to_output_reduction->configure(compile_context, recurrent_to_output_weights->
info(), _recurrent_to_output_eff_bias.
info(),
GEMMLowpReductionKernelInfo(num_units,
false,
259 -qoutput_state_in.
offset,
true));
263 if(_projection_bias !=
nullptr)
270 _transpose_input_to_forget_weights.
configure(compile_context, input_to_forget_weights, &_input_to_forget_weights_transposed);
271 _transpose_input_to_cell_weights.
configure(compile_context, input_to_cell_weights, &_input_to_cell_weights_transposed);
272 _transpose_input_to_output_weights.
configure(compile_context, input_to_output_weights, &_input_to_output_weights_transposed);
273 _transpose_recurrent_to_forget_weights.
configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_weights_transposed);
274 _transpose_recurrent_to_cell_weights.
configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_weights_transposed);
275 _transpose_recurrent_to_output_weights.
configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_weights_transposed);
283 _transpose_projection_weights.
configure(compile_context, _projection_weights, &_projection_weights_transposed);
296 configure_mm(compile_context, _mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info,
297 input, &_input_to_forget_weights_transposed, &_input_to_forget_eff_bias,
298 &_mm_input_to_forget_res, &_input_to_forget_outstage_res, input_to_forget_scale,
299 mm_out_info, forget_gate_outstage_info);
302 configure_mm(compile_context, _mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
303 output_state_in, &_recurrent_to_forget_weights_transposed, &_recurrent_to_forget_eff_bias,
304 &_mm_recurrent_to_forget_res, &_recurrent_to_forget_outstage_res, recurrent_to_forget_scale,
305 mm_out_info, forget_gate_outstage_info);
307 _accumulate_input_recurrent_forget.
configure(compile_context, &_input_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
314 _memory_group.
manage(&_mul_cell_to_forget_res);
317 _memory_group.
manage(&_cell_to_forget_outstage_res);
320 _cell_to_forget_outstage.
configure(compile_context, &_mul_cell_to_forget_res,
nullptr, &_cell_to_forget_outstage_res, gemmlowp_info);
322 _accumulate_cell_forget.
configure(compile_context, &_recurrent_to_forget_outstage_res, &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
327 CLTensor *forget_activation_input = &_recurrent_to_forget_outstage_res;
331 configure_layer_norm(LayerNormGate::Forget, &_recurrent_to_forget_outstage_res);
333 forget_activation_input = &get_layer_norm_output(LayerNormGate::Forget);
340 _memory_group.
manage(&_forget_gate);
348 configure_mm(compile_context, _mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info,
349 input, &_input_to_cell_weights_transposed, &_input_to_cell_eff_bias,
350 &_mm_input_to_cell_res, &_input_to_cell_outstage_res, input_to_cell_scale,
351 mm_out_info, cell_outstage_info);
354 configure_mm(compile_context, _mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info,
355 output_state_in, &_recurrent_to_cell_weights_transposed, &_recurrent_to_cell_eff_bias,
356 &_mm_recurrent_to_cell_res, &_recurrent_to_cell_outstage_res, recurrent_to_cell_scale,
357 mm_out_info, cell_outstage_info);
359 _accumulate_input_recurrent_modulation.
configure(compile_context, &_input_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res,
363 CLTensor *cell_activation_input = &_recurrent_to_cell_outstage_res;
367 configure_layer_norm(LayerNormGate::Cell, &_recurrent_to_cell_outstage_res);
369 cell_activation_input = &get_layer_norm_output(LayerNormGate::Cell);
373 _memory_group.
manage(&_cell_gate);
381 _memory_group.
manage(&_input_gate);
392 configure_mm(compile_context, _mm_input_to_input, _input_to_input_outstage, gemmlowp_info,
393 input, &_input_to_input_weights_transposed, &_input_to_input_eff_bias,
394 &_mm_input_to_input_res, &_input_to_input_outstage_res, input_to_input_scale,
395 mm_out_info, input_outstage_info);
398 configure_mm(compile_context, _mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
399 output_state_in, &_recurrent_to_input_weights_transposed, &_recurrent_to_input_eff_bias,
400 &_mm_recurrent_to_input_res, &_recurrent_to_input_outstage_res, recurrent_to_input_scale,
401 mm_out_info, input_outstage_info);
402 _accumulate_input_recurrent_input.
configure(compile_context, &_input_to_input_outstage_res, &_recurrent_to_input_outstage_res, &_recurrent_to_input_outstage_res,
409 _memory_group.
manage(&_mul_cell_to_input_res);
414 _memory_group.
manage(&_cell_to_input_outstage_res);
415 _cell_to_input_outstage.
configure(compile_context, &_mul_cell_to_input_res,
nullptr, &_cell_to_input_outstage_res, gemmlowp_info);
417 _accumulate_cell_input.
configure(&_recurrent_to_input_outstage_res, &_cell_to_input_outstage_res, &_recurrent_to_input_outstage_res,
ConvertPolicy::SATURATE);
421 CLTensor *input_activation_input = &_recurrent_to_input_outstage_res;
425 configure_layer_norm(LayerNormGate::Input, &_recurrent_to_input_outstage_res);
427 input_activation_input = &get_layer_norm_output(LayerNormGate::Input);
437 const float mul_input_cell_scale = cell_gate_scale * std::pow(2, 15 + cell_shift);
439 _memory_group.
manage(&_mul_input_cell_res);
446 if(_has_cell_clipping)
453 configure_mm(compile_context, _mm_input_to_output, _input_to_output_outstage, gemmlowp_info,
454 input, &_input_to_output_weights_transposed, &_input_to_output_eff_bias,
455 &_mm_input_to_output_res, &_input_to_output_outstage_res, input_to_output_scale,
456 mm_out_info, output_outstage_info);
459 configure_mm(compile_context, _mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
460 output_state_in, &_recurrent_to_output_weights_transposed, &_recurrent_to_output_eff_bias,
461 &_mm_recurrent_to_output_res, &_recurrent_to_output_outstage_res, recurrent_to_output_scale,
462 mm_out_info, output_outstage_info);
464 _accumulate_input_recurrent_output.
configure(compile_context, &_recurrent_to_output_outstage_res, &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res,
473 _memory_group.
manage(&_mul_cell_to_output_res);
479 _memory_group.
manage(&_cell_to_output_outstage_res);
480 _cell_to_output_outstage.
configure(compile_context, &_mul_cell_to_output_res,
nullptr, &_cell_to_output_outstage_res, gemmlowp_info);
483 _accumulate_cell_to_output.
configure(compile_context, &_recurrent_to_output_outstage_res, &_cell_to_output_outstage_res, &_recurrent_to_output_outstage_res,
488 CLTensor *output_activation_input = &_recurrent_to_output_outstage_res;
492 configure_layer_norm(LayerNormGate::Output, &_recurrent_to_output_outstage_res);
494 output_activation_input = &get_layer_norm_output(LayerNormGate::Output);
498 _memory_group.
manage(&_output_gate);
506 _memory_group.
manage(&_hidden_mul_res);
512 const float hidden_state_scale = std::pow(2, -15) / lstm_params.
hidden_state_scale() * std::pow(2, -15);
517 _projection_tensor_copy_required = (num_units !=
output_size);
518 ICLTensor *hidden_gate_result = output_state_out;
520 _memory_group.
manage(&_hidden_gate);
522 if(_projection_tensor_copy_required)
526 hidden_gate_result = &_hidden_gate;
529 _hidden_outstage.
configure(compile_context, &_hidden_mul_res,
nullptr, hidden_gate_result, gemmlowp_info);
535 const TensorInfo projection_outstage_info(*output_state_out->
info());
543 TensorInfo projection_mm_out_info{ mm_out_info };
546 configure_mm(compile_context, _mm_projection, _projection_outstage, gemmlowp_info,
547 hidden_gate_result, &_projection_weights_transposed, &_projection_eff_bias,
548 &_mm_projection_res, &_projection_outstage_res, projection_scale,
549 projection_mm_out_info, projection_outstage_info);
551 ICLTensor *accumulate_destination = output_state_out;
553 if(_projection_tensor_copy_required)
558 _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
559 accumulate_destination = &_projection_accumulate_res;
565 if(_projection_tensor_copy_required)
567 _projection_accumulate_to_output_copy.configure(_projection_accumulate_res, *output_state_out);
571 int8_t quantized_projection_clip{ 0 };
574 quantized_projection_clip = utility::clamp<int8_t>(lstm_params.
projection_clip() / qprojection.scale, -128, 127);
577 if(quantized_projection_clip > 0)
580 quantized_projection_clip));
581 _has_projection_clipping =
true;
586 if(_projection_tensor_copy_required)
588 _hidden_to_output_copy.configure(_hidden_gate, *output_state_out);
594 _copy_output.
configure(compile_context, output_state_out, output);
605 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
606 recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
607 cell_state_out, output_state_out, output);
613 const unsigned int batch_size = input->
dimension(1);
614 const unsigned int num_units = input_to_output_weights->
dimension(1);
615 const unsigned int output_size = output_state_out->
dimension(_out_state_output_size_dimension_idx);
625 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
666 const int32_t cell_shift = log2(qcell_state_in.
scale);
670 int16_t quantized_cell_clip = 0;
738 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_forget_scale, &mm_out_info, &forget_outstage_info));
741 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_forget_scale, &mm_out_info, &forget_outstage_info));
773 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_cell_scale, &mm_out_info, &cell_outstage_info));
776 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &input_weights_transposed, &eff_bias_info, recurrent_to_cell_scale, &mm_out_info, &cell_outstage_info));
809 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_input_scale, &mm_out_info, &input_outstage_info));
812 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_input_scale, &mm_out_info, &input_outstage_info));
839 if(quantized_cell_clip > 0)
842 quantized_cell_clip)));
848 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_output_scale, &mm_out_info, &output_outstage_info));
851 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_output_scale, &mm_out_info, &output_outstage_info));
883 const float hidden_state_scale = std::pow(2, -15) / lstm_params.
hidden_state_scale() * std::pow(2, -15);
889 const bool projection_tensor_copy_required = num_units !=
output_size;
905 const TensorInfo projection_outstage_info(*output_state_out);
908 TensorInfo projection_mm_out_info{ mm_out_info };
911 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, &hidden_out_info, &projection_weights_transposed, &projection_eff_bias_info, projection_scale, &projection_mm_out_info,
912 &projection_outstage_info));
914 if(projection_tensor_copy_required)
921 if(projection_tensor_copy_required)
926 int8_t quantized_projection_clip{ 0 };
932 if(quantized_projection_clip > 0)
935 quantized_projection_clip)));
940 if(projection_tensor_copy_required)
970 _mm_input_to_forget.
run();
971 _input_to_forget_outstage.
run();
973 _mm_recurrent_to_forget.
run();
974 _recurrent_to_forget_outstage.
run();
975 _accumulate_input_recurrent_forget.
run();
979 _pixelwise_mul_cell_to_forget.
run();
980 _cell_to_forget_outstage.
run();
981 _accumulate_cell_forget.
run();
989 _forget_gate_sigmoid.
run();
992 _mm_input_to_cell.
run();
993 _input_to_cell_outstage.
run();
995 _mm_recurrent_to_cell.
run();
996 _recurrent_to_cell_outstage.
run();
997 _accumulate_input_recurrent_modulation.
run();
1004 _cell_gate_tanh.
run();
1009 _input_gate_sub.
run();
1013 _mm_input_to_input.
run();
1014 _input_to_input_outstage.
run();
1015 _mm_recurrent_to_input.
run();
1016 _recurrent_to_input_outstage.
run();
1017 _accumulate_input_recurrent_input.
run();
1021 _pixelwise_mul_cell_to_input.
run();
1022 _cell_to_input_outstage.
run();
1023 _accumulate_cell_input.
run();
1031 _input_gate_sigmoid.
run();
1035 _pixelwise_mul_forget_cell.
run();
1036 _pixelwise_mul_input_cell.
run();
1037 _add_forget_cell.
run();
1038 if(_has_cell_clipping)
1044 _mm_input_to_output.
run();
1045 _input_to_output_outstage.
run();
1046 _mm_recurrent_to_output.
run();
1047 _recurrent_to_output_outstage.
run();
1048 _accumulate_input_recurrent_output.
run();
1051 _pixelwise_mul_cell_to_output.
run();
1052 _cell_to_output_outstage.
run();
1053 _accumulate_cell_to_output.
run();
1061 _output_gate_sigmoid.
run();
1065 _pixelwise_mul_hidden.
run();
1066 _hidden_outstage.
run();
1071 _mm_projection.
run();
1072 _projection_outstage.
run();
1074 if(_projection_tensor_copy_required)
1076 _projection_output_to_accumulate_copy.run();
1079 _accumulate_projection.
run();
1081 if(_projection_tensor_copy_required)
1083 _projection_accumulate_to_output_copy.run();
1086 if(_has_projection_clipping)
1088 _projection_clip.
run();
1093 if(_projection_tensor_copy_required)
1095 _hidden_to_output_copy.run();
1114 _transpose_input_to_forget_weights.
run();
1115 _transpose_input_to_cell_weights.
run();
1116 _transpose_input_to_output_weights.
run();
1117 _transpose_recurrent_to_forget_weights.
run();
1118 _transpose_recurrent_to_cell_weights.
run();
1119 _transpose_recurrent_to_output_weights.
run();
1136 ITensorPack rec_to_input_red_pack = { {
ACL_SRC, _recurrent_to_input_weights }, {
ACL_DST, &_recurrent_to_input_eff_bias } };
1141 _transpose_input_to_input_weights.
run();
1142 _transpose_recurrent_to_input_weights.
run();
1153 ITensorPack input_to_forget_red_pack = { {
ACL_SRC, _input_to_forget_weights }, {
ACL_DST, &_input_to_forget_eff_bias } };
1156 ITensorPack rec_to_forget_red_pack = { {
ACL_SRC, _recurrent_to_forget_weights }, {
ACL_DST, &_recurrent_to_forget_eff_bias } };
1162 ITensorPack rec_to_cell_red_pack = { {
ACL_SRC, _recurrent_to_cell_weights }, {
ACL_DST, &_recurrent_to_cell_eff_bias } };
1165 ITensorPack input_to_output_red_pack = { {
ACL_SRC, _input_to_output_weights }, {
ACL_DST, &_input_to_output_eff_bias } };
1168 ITensorPack rec_to_output_red_pack = { {
ACL_SRC, _recurrent_to_output_weights }, {
ACL_DST, &_recurrent_to_output_eff_bias } };
1176 if(_projection_bias !=
nullptr)
1178 _projection_bias_add.
run();
1183 _transpose_projection_weights.
run();
1186 if(!_projection_tensor_copy_required)
1202 _is_prepared =
true;
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const T * projection_weights() const
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
const T * input_to_input_weights() const
int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit symmetric quantization scheme.
Quantize using a fixed point multiplication.
quantized, symmetric fixed-point 16-bit number
bool use_layer_norm() const
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void run() override
Run the kernels contained in the function.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool has_peephole_opt() const
static CLScheduler & get()
Access the scheduler singleton.
T * forget_layer_norm_weights() const
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor. ...
Basic function to execute GEMMLowpQuantizeDown kernels on CL.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
QuantizationInfo quantization_info() const override
Get the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
float output_intermediate_scale() const
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
bool has_cifg_opt() const
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
float cell_intermediate_scale() const
float forget_intermediate_scale() const
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
auto recurrent_to_forget_weights
Store the tensor's metadata.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
T * cell_to_input_weights() const
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8...
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
CLQLSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias)
Initialise the kernel's input and outputs.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
SimpleTensor< float > src
void configure(const ICLTensor *input, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, ICLTensor *cell_state_in, ICLTensor *output_state_in, ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output, const LSTMParams< ICLTensor > &lstm_params)
Initialize function's tensors.
Copyright (c) 2017-2021 Arm Limited.
auto input_to_cell_weights
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
DataType data_type() const override
Data type used for each element of the tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClGemmL...
void mark_as_unused() const
Marks a tensor as unused.
void prepare() override
Prepare the function for executing.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
const T * recurrent_to_input_weights() const
static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info)
Static function to check if given info will lead to a valid configuration.
int32_t hidden_state_zero() const
auto input_to_output_weights
const T * projection_bias() const
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias)
Static function to check if given info will lead to a valid configuration of CLQLSTMLayerNormalizatio...
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
T * output_layer_norm_weights() const
float input_intermediate_scale() const
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(ICLTensor *input, ICLTensor *output, Window *dst_window=nullptr)
Initialise the function's source and destination.
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
float hidden_state_scale() const
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
size_t total_size() const override
Returns the total size of the tensor in bytes.
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush=true)
Schedule the execution of the passed kernel if possible.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
auto recurrent_to_cell_weights
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
quantized, symmetric fixed-point 8-bit number
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
T * cell_to_forget_weights() const
static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *dst_window=nullptr)
Static function to check if given info will lead to a valid configuration of CLCopy.
Lower and Upper Bounded Rectifier ( )
bool has_projection() const
float projection_clip() const
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
T * cell_to_output_weights() const
Memory group resources scope handling class.
Interface for OpenCL tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
T * input_layer_norm_weights() const
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
const T * input_gate_bias() const
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A...
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
ITensorInfo & set_tensor_shape(const TensorShape &shape) override
Set the shape of an already initialized tensor.
Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL.
T * cell_layer_norm_weights() const
quantized, asymmetric fixed-point 8-bit number signed
im2col_func configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias)
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8...
~CLQLSTMLayer()
Default destructor.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType output_data_type
Output tensor data type to use if the output is not initialized.
Truncates the least significant values that are lost in operations.
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
auto input_to_forget_weights
size_t element_size() const override
Element size in bytes calculated as data_size() * num_channels()
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of CLPixelWiseMultiplicatio...
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params)
Static function to check if given info will lead to a valid configuration of CLQLSTMLayer.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
Basic implementation of the OpenCL tensor interface.