49 Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info,
const ITensorInfo *mm_input,
const ITensorInfo *mm_weights,
const ITensorInfo *bias,
50 float gemmlowp_scale,
const TensorInfo *mm_res_info,
const TensorInfo *outstage_tensor_info)
68 void CLQLSTMLayer::TensorCopyKernel::configure(ICLTensor &
src, ICLTensor &
dst)
73 _row_size = std::min(_src->info()->tensor_shape().x(), _dst->info()->tensor_shape().x());
84 Iterator input_iter{ _src, _window };
85 Iterator output_iter{ _dst, _window };
89 memcpy(output_iter.ptr(), input_iter.ptr(), _row_size);
91 input_iter, output_iter);
110 for(
auto &norm : _layer_norms)
112 norm = std::make_unique<CLQLSTMLayerNormalizationKernel>();
115 _memory_group =
MemoryGroup(std::move(memory_manager));
120 void CLQLSTMLayer::configure_layer_norm(LayerNormGate g,
const ICLTensor *in)
124 CLTensor *out = &get_layer_norm_output(g);
125 _memory_group.
manage(out);
128 get_layer_norm(g).
configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g));
131 Status CLQLSTMLayer::validate_layer_norm(
const ITensorInfo &in,
const ITensorInfo &weight,
const ITensorInfo &bias)
142 void CLQLSTMLayer::configure_mm(
const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
143 const ICLTensor *mm_input,
const ICLTensor *mm_weights,
const ICLTensor *bias,
144 CLTensor *mm_res, CLTensor *outstage_res,
float gemmlowp_scale,
145 const TensorInfo &mm_res_info,
const TensorInfo &outstage_tensor_info)
147 _memory_group.
manage(mm_res);
148 _memory_group.
manage(outstage_res);
150 mm_res->allocator()->init(mm_res_info);
151 outstage_res->allocator()->init(outstage_tensor_info);
154 mm.configure(compile_context, mm_input, mm_weights,
nullptr, mm_res);
158 outstage.configure(compile_context, mm_res, bias, outstage_res, gemmlowp_info);
159 mm_res->allocator()->allocate();
172 cell_state_in, output_state_in, cell_state_out, output_state_out, output, lstm_params);
186 cell_state_out, output_state_out, output);
196 cell_state_in->
info(), output_state_in->
info(), cell_state_out->
info(), output_state_out->
info(), output->
info(),
199 const int batch_size =
input->info()->dimension(1);
226 set_layer_norm_bias(cell_bias, LayerNormGate::Cell);
227 set_layer_norm_bias(lstm_params.
input_gate_bias(), LayerNormGate::Input);
236 const int32_t cell_shift = log2(qcell_state_in.
scale);
239 int16_t quantized_cell_clip = 0;
244 _has_cell_clipping = quantized_cell_clip > 0;
252 _input_to_input_reduction->configure(compile_context, _input_to_input_weights, &_input_to_input_eff_bias,
GEMMLowpReductionKernelInfo(num_units,
false, -qinput.
offset,
true));
253 _recurrent_to_input_reduction->configure(compile_context, _recurrent_to_input_weights, &_recurrent_to_input_eff_bias,
GEMMLowpReductionKernelInfo(num_units,
false, -qoutput_state_in.
offset,
true));
264 if(_projection_bias !=
nullptr)
284 _transpose_projection_weights.
configure(compile_context, _projection_weights, &_projection_weights_transposed);
297 configure_mm(compile_context, _mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info,
298 input, &_input_to_forget_weights_transposed, &_input_to_forget_eff_bias,
299 &_mm_input_to_forget_res, &_input_to_forget_outstage_res, input_to_forget_scale,
300 mm_out_info, forget_gate_outstage_info);
303 configure_mm(compile_context, _mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
304 output_state_in, &_recurrent_to_forget_weights_transposed, &_recurrent_to_forget_eff_bias,
305 &_mm_recurrent_to_forget_res, &_recurrent_to_forget_outstage_res, recurrent_to_forget_scale,
306 mm_out_info, forget_gate_outstage_info);
308 _accumulate_input_recurrent_forget.
configure(compile_context, &_input_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
315 _memory_group.
manage(&_mul_cell_to_forget_res);
318 _memory_group.
manage(&_cell_to_forget_outstage_res);
321 _cell_to_forget_outstage.
configure(compile_context, &_mul_cell_to_forget_res,
nullptr, &_cell_to_forget_outstage_res, gemmlowp_info);
323 _accumulate_cell_forget.
configure(compile_context, &_recurrent_to_forget_outstage_res, &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
328 CLTensor *forget_activation_input = &_recurrent_to_forget_outstage_res;
332 configure_layer_norm(LayerNormGate::Forget, &_recurrent_to_forget_outstage_res);
334 forget_activation_input = &get_layer_norm_output(LayerNormGate::Forget);
341 _memory_group.
manage(&_forget_gate);
349 configure_mm(compile_context, _mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info,
350 input, &_input_to_cell_weights_transposed, &_input_to_cell_eff_bias,
351 &_mm_input_to_cell_res, &_input_to_cell_outstage_res, input_to_cell_scale,
352 mm_out_info, cell_outstage_info);
355 configure_mm(compile_context, _mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info,
356 output_state_in, &_recurrent_to_cell_weights_transposed, &_recurrent_to_cell_eff_bias,
357 &_mm_recurrent_to_cell_res, &_recurrent_to_cell_outstage_res, recurrent_to_cell_scale,
358 mm_out_info, cell_outstage_info);
360 _accumulate_input_recurrent_modulation.
configure(compile_context, &_input_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res,
364 CLTensor *cell_activation_input = &_recurrent_to_cell_outstage_res;
368 configure_layer_norm(LayerNormGate::Cell, &_recurrent_to_cell_outstage_res);
370 cell_activation_input = &get_layer_norm_output(LayerNormGate::Cell);
374 _memory_group.
manage(&_cell_gate);
382 _memory_group.
manage(&_input_gate);
393 configure_mm(compile_context, _mm_input_to_input, _input_to_input_outstage, gemmlowp_info,
394 input, &_input_to_input_weights_transposed, &_input_to_input_eff_bias,
395 &_mm_input_to_input_res, &_input_to_input_outstage_res, input_to_input_scale,
396 mm_out_info, input_outstage_info);
399 configure_mm(compile_context, _mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
400 output_state_in, &_recurrent_to_input_weights_transposed, &_recurrent_to_input_eff_bias,
401 &_mm_recurrent_to_input_res, &_recurrent_to_input_outstage_res, recurrent_to_input_scale,
402 mm_out_info, input_outstage_info);
403 _accumulate_input_recurrent_input.
configure(compile_context, &_input_to_input_outstage_res, &_recurrent_to_input_outstage_res, &_recurrent_to_input_outstage_res,
410 _memory_group.
manage(&_mul_cell_to_input_res);
415 _memory_group.
manage(&_cell_to_input_outstage_res);
416 _cell_to_input_outstage.
configure(compile_context, &_mul_cell_to_input_res,
nullptr, &_cell_to_input_outstage_res, gemmlowp_info);
418 _accumulate_cell_input.
configure(&_recurrent_to_input_outstage_res, &_cell_to_input_outstage_res, &_recurrent_to_input_outstage_res,
ConvertPolicy::SATURATE);
422 CLTensor *input_activation_input = &_recurrent_to_input_outstage_res;
426 configure_layer_norm(LayerNormGate::Input, &_recurrent_to_input_outstage_res);
428 input_activation_input = &get_layer_norm_output(LayerNormGate::Input);
438 const float mul_input_cell_scale = cell_gate_scale * std::pow(2, 15 + cell_shift);
440 _memory_group.
manage(&_mul_input_cell_res);
447 if(_has_cell_clipping)
454 configure_mm(compile_context, _mm_input_to_output, _input_to_output_outstage, gemmlowp_info,
455 input, &_input_to_output_weights_transposed, &_input_to_output_eff_bias,
456 &_mm_input_to_output_res, &_input_to_output_outstage_res, input_to_output_scale,
457 mm_out_info, output_outstage_info);
460 configure_mm(compile_context, _mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
461 output_state_in, &_recurrent_to_output_weights_transposed, &_recurrent_to_output_eff_bias,
462 &_mm_recurrent_to_output_res, &_recurrent_to_output_outstage_res, recurrent_to_output_scale,
463 mm_out_info, output_outstage_info);
465 _accumulate_input_recurrent_output.
configure(compile_context, &_recurrent_to_output_outstage_res, &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res,
474 _memory_group.
manage(&_mul_cell_to_output_res);
480 _memory_group.
manage(&_cell_to_output_outstage_res);
481 _cell_to_output_outstage.
configure(compile_context, &_mul_cell_to_output_res,
nullptr, &_cell_to_output_outstage_res, gemmlowp_info);
484 _accumulate_cell_to_output.
configure(compile_context, &_recurrent_to_output_outstage_res, &_cell_to_output_outstage_res, &_recurrent_to_output_outstage_res,
489 CLTensor *output_activation_input = &_recurrent_to_output_outstage_res;
493 configure_layer_norm(LayerNormGate::Output, &_recurrent_to_output_outstage_res);
495 output_activation_input = &get_layer_norm_output(LayerNormGate::Output);
499 _memory_group.
manage(&_output_gate);
507 _memory_group.
manage(&_hidden_mul_res);
513 const float hidden_state_scale = std::pow(2, -15) / lstm_params.
hidden_state_scale() * std::pow(2, -15);
518 _projection_tensor_copy_required = (num_units !=
output_size);
519 ICLTensor *hidden_gate_result = output_state_out;
521 _memory_group.
manage(&_hidden_gate);
523 if(_projection_tensor_copy_required)
527 hidden_gate_result = &_hidden_gate;
530 _hidden_outstage.
configure(compile_context, &_hidden_mul_res,
nullptr, hidden_gate_result, gemmlowp_info);
536 const TensorInfo projection_outstage_info(*output_state_out->
info());
544 TensorInfo projection_mm_out_info{ mm_out_info };
547 configure_mm(compile_context, _mm_projection, _projection_outstage, gemmlowp_info,
548 hidden_gate_result, &_projection_weights_transposed, &_projection_eff_bias,
549 &_mm_projection_res, &_projection_outstage_res, projection_scale,
550 projection_mm_out_info, projection_outstage_info);
552 ICLTensor *accumulate_destination = output_state_out;
554 if(_projection_tensor_copy_required)
559 _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
560 accumulate_destination = &_projection_accumulate_res;
566 if(_projection_tensor_copy_required)
568 _projection_accumulate_to_output_copy.configure(_projection_accumulate_res, *output_state_out);
572 int8_t quantized_projection_clip{ 0 };
575 quantized_projection_clip = utility::clamp<int8_t>(lstm_params.
projection_clip() / qprojection.scale, -128, 127);
578 if(quantized_projection_clip > 0)
581 quantized_projection_clip));
582 _has_projection_clipping =
true;
587 if(_projection_tensor_copy_required)
589 _hidden_to_output_copy.configure(_hidden_gate, *output_state_out);
595 _copy_output.
configure(compile_context, output_state_out, output);
608 cell_state_out, output_state_out, output);
614 const unsigned int batch_size =
input->dimension(1);
616 const unsigned int output_size = output_state_out->
dimension(_out_state_output_size_dimension_idx);
667 const int32_t cell_shift = log2(qcell_state_in.
scale);
671 int16_t quantized_cell_clip = 0;
739 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info,
input, &input_weights_transposed, &eff_bias_info, input_to_forget_scale, &mm_out_info, &forget_outstage_info));
742 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_forget_scale, &mm_out_info, &forget_outstage_info));
774 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info,
input, &input_weights_transposed, &eff_bias_info, input_to_cell_scale, &mm_out_info, &cell_outstage_info));
777 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &input_weights_transposed, &eff_bias_info, recurrent_to_cell_scale, &mm_out_info, &cell_outstage_info));
810 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info,
input, &input_weights_transposed, &eff_bias_info, input_to_input_scale, &mm_out_info, &input_outstage_info));
813 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_input_scale, &mm_out_info, &input_outstage_info));
840 if(quantized_cell_clip > 0)
843 quantized_cell_clip)));
849 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info,
input, &input_weights_transposed, &eff_bias_info, input_to_output_scale, &mm_out_info, &output_outstage_info));
852 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_output_scale, &mm_out_info, &output_outstage_info));
884 const float hidden_state_scale = std::pow(2, -15) / lstm_params.
hidden_state_scale() * std::pow(2, -15);
890 const bool projection_tensor_copy_required = num_units !=
output_size;
906 const TensorInfo projection_outstage_info(*output_state_out);
909 TensorInfo projection_mm_out_info{ mm_out_info };
912 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, &hidden_out_info, &projection_weights_transposed, &projection_eff_bias_info, projection_scale, &projection_mm_out_info,
913 &projection_outstage_info));
915 if(projection_tensor_copy_required)
922 if(projection_tensor_copy_required)
927 int8_t quantized_projection_clip{ 0 };
933 if(quantized_projection_clip > 0)
936 quantized_projection_clip)));
941 if(projection_tensor_copy_required)
971 _mm_input_to_forget.
run();
972 _input_to_forget_outstage.
run();
974 _mm_recurrent_to_forget.
run();
975 _recurrent_to_forget_outstage.
run();
976 _accumulate_input_recurrent_forget.
run();
980 _pixelwise_mul_cell_to_forget.
run();
981 _cell_to_forget_outstage.
run();
982 _accumulate_cell_forget.
run();
990 _forget_gate_sigmoid.
run();
993 _mm_input_to_cell.
run();
994 _input_to_cell_outstage.
run();
996 _mm_recurrent_to_cell.
run();
997 _recurrent_to_cell_outstage.
run();
998 _accumulate_input_recurrent_modulation.
run();
1005 _cell_gate_tanh.
run();
1010 _input_gate_sub.
run();
1014 _mm_input_to_input.
run();
1015 _input_to_input_outstage.
run();
1016 _mm_recurrent_to_input.
run();
1017 _recurrent_to_input_outstage.
run();
1018 _accumulate_input_recurrent_input.
run();
1022 _pixelwise_mul_cell_to_input.
run();
1023 _cell_to_input_outstage.
run();
1024 _accumulate_cell_input.
run();
1032 _input_gate_sigmoid.
run();
1036 _pixelwise_mul_forget_cell.
run();
1037 _pixelwise_mul_input_cell.
run();
1038 _add_forget_cell.
run();
1039 if(_has_cell_clipping)
1045 _mm_input_to_output.
run();
1046 _input_to_output_outstage.
run();
1047 _mm_recurrent_to_output.
run();
1048 _recurrent_to_output_outstage.
run();
1049 _accumulate_input_recurrent_output.
run();
1052 _pixelwise_mul_cell_to_output.
run();
1053 _cell_to_output_outstage.
run();
1054 _accumulate_cell_to_output.
run();
1062 _output_gate_sigmoid.
run();
1066 _pixelwise_mul_hidden.
run();
1067 _hidden_outstage.
run();
1072 _mm_projection.
run();
1073 _projection_outstage.
run();
1075 if(_projection_tensor_copy_required)
1077 _projection_output_to_accumulate_copy.run();
1080 _accumulate_projection.
run();
1082 if(_projection_tensor_copy_required)
1084 _projection_accumulate_to_output_copy.run();
1087 if(_has_projection_clipping)
1089 _projection_clip.
run();
1094 if(_projection_tensor_copy_required)
1096 _hidden_to_output_copy.run();
1115 _transpose_input_to_forget_weights.
run();
1116 _transpose_input_to_cell_weights.
run();
1117 _transpose_input_to_output_weights.
run();
1118 _transpose_recurrent_to_forget_weights.
run();
1119 _transpose_recurrent_to_cell_weights.
run();
1120 _transpose_recurrent_to_output_weights.
run();
1138 _transpose_input_to_input_weights.
run();
1139 _transpose_recurrent_to_input_weights.
run();
1160 if(_projection_bias !=
nullptr)
1162 _projection_bias_add.
run();
1167 _transpose_projection_weights.
run();
1170 if(!_projection_tensor_copy_required)
1186 _is_prepared =
true;
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const T * projection_weights() const
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
const T * input_to_input_weights() const
int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit symmetric quantization scheme.
Quantize using a fixed point multiplication.
quantized, symmetric fixed-point 16-bit number
bool use_layer_norm() const
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void run() override
Run the kernels contained in the function.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool has_peephole_opt() const
static CLScheduler & get()
Access the scheduler singleton.
T * forget_layer_norm_weights() const
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
QuantizationInfo quantization_info() const override
Get the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
float output_intermediate_scale() const
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
bool has_cifg_opt() const
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
float cell_intermediate_scale() const
float forget_intermediate_scale() const
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
auto recurrent_to_forget_weights
Store the tensor's metadata.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
T * cell_to_input_weights() const
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
CLQLSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias)
Initialise the kernel's input and outputs.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
SimpleTensor< float > src
void configure(const ICLTensor *input, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, ICLTensor *cell_state_in, ICLTensor *output_state_in, ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output, const LSTMParams< ICLTensor > &lstm_params)
Initialize function's tensors.
Copyright (c) 2017-2021 Arm Limited.
auto input_to_cell_weights
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
DataType data_type() const override
Data type used for each element of the tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of CLGEMMLowpQuantizeDownIn...
void mark_as_unused() const
Marks a tensor as unused.
void prepare() override
Prepare the function for executing.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
const T * recurrent_to_input_weights() const
int32_t hidden_state_zero() const
auto input_to_output_weights
const T * projection_bias() const
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias)
Static function to check if given info will lead to a valid configuration of CLQLSTMLayerNormalizatio...
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
T * output_layer_norm_weights() const
float input_intermediate_scale() const
void run() override final
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(ICLTensor *input, ICLTensor *output, Window *dst_window=nullptr)
Initialise the function's source and destination.
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
float hidden_state_scale() const
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
size_t total_size() const override
Returns the total size of the tensor in bytes.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
auto recurrent_to_cell_weights
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
quantized, symmetric fixed-point 8-bit number
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
T * cell_to_forget_weights() const
static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *dst_window=nullptr)
Static function to check if given info will lead to a valid configuration of CLCopy.
Lower and Upper Bounded Rectifier ( )
bool has_projection() const
float projection_clip() const
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
T * cell_to_output_weights() const
Memory group resources scope handling class.
Interface for OpenCL tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
T * input_layer_norm_weights() const
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info)
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixAReducti...
const T * input_gate_bias() const
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
ITensorInfo & set_tensor_shape(const TensorShape &shape) override
Set the shape of an already initialized tensor.
T * cell_layer_norm_weights() const
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8.
~CLQLSTMLayer()
Default destructor.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType output_data_type
Output tensor data type to use if the output is not initialized.
Truncates the least significant values that are lost in operations.
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
auto input_to_forget_weights
size_t element_size() const override
Element size in bytes calculated as data_size() * num_channels()
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of CLPixelWiseMultiplicatio...
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params)
Static function to check if given info will lead to a valid configuration of CLQLSTMLayer.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
Basic implementation of the OpenCL tensor interface.