47 const QuantizationInfo
qasymm(1.f / 128.f, 128);
48 const QuantizationInfo
qsymm_3(8.f / 32768.f, 0);
49 const QuantizationInfo
qsymm_4(16.f / 32768.f, 0);
50 const QuantizationInfo qsymm_0(1.f / 32768.f, 0);
54 : _memory_group(std::move(memory_manager)), _gemmlowp(), _output_stage(), _transpose_weights(), _concat_input_weights(), _concat_recurrent_weights(), _concat_weights(), _concat_inputs(),
55 _concat_bias(), _sigmoid_forget_gate(), _sigmoid_input_gate(), _sigmoid_output_gate(), _tanh_modulation_gate(), _tanh_output_state(), _add_cell_state_tmps(), _add2(), _mul_forget_gate_cell_state(),
56 _mul_input_gate_input_mod_gate(), _mul_output_state_tmp_output_gate(), _slice_input_tensor(), _slice_forget_tensor(), _slice_cell_tensor(), _slice_output_tensor(), _dequantize(), _quantize(),
57 _input_to_input_weights(nullptr), _input_to_forget_weights(nullptr), _input_to_cell_weights(nullptr), _input_to_output_weights(nullptr), _recurrent_to_input_weights(nullptr),
58 _recurrent_to_forget_weights(nullptr), _recurrent_to_cell_weights(nullptr), _recurrent_to_output_weights(nullptr), _input_gate_bias(nullptr), _forget_gate_bias(nullptr), _cell_bias(nullptr),
59 _output_gate_bias(nullptr), _recurrent_weights(), _input_weights(), _weights(), _input(), _weights_transposed(), _output_highp(), _output_lowp(), _bias(), _forget_gate_input(), _input_gate_input(),
60 _output_gate_input(), _input_modulation_gate_input(), _forget_gate_output(), _input_gate_output(), _output_gate_output(), _input_modulation_gate_output(), _cell_state_tmp1(), _cell_state_tmp2(),
61 _output_state_tmp(), _output_state_out_symm(), _output_state_out_f32(), _is_prepared(false)
73 recurrent_to_forget_weights,
recurrent_to_cell_weights,
recurrent_to_output_weights,
input_gate_bias,
forget_gate_bias, cell_bias,
output_gate_bias, cell_state_in, output_state_in, cell_state_out,
94 const int batch_size =
input->info()->dimension(1);
112 _cell_bias = cell_bias;
116 std::vector<const ICLTensor *> inputs_weights_vector;
122 std::vector<const ICLTensor *> recurrent_weights_vector;
129 _concat_input_weights.
configure(compile_context, inputs_weights_vector, &_input_weights,
Window::DimY);
132 _concat_recurrent_weights.
configure(compile_context, recurrent_weights_vector, &_recurrent_weights,
Window::DimY);
134 std::vector<const ICLTensor *> weights_vector;
135 weights_vector.emplace_back(&_recurrent_weights);
136 weights_vector.emplace_back(&_input_weights);
140 _transpose_weights.
configure(compile_context, &_weights, &_weights_transposed);
143 std::vector<const ICLTensor *> input_vector;
144 input_vector.emplace_back(
input);
145 input_vector.emplace_back(output_state_in);
147 _memory_group.
manage(&_input);
152 std::vector<const ICLTensor *> bias_vector;
155 bias_vector.emplace_back(cell_bias);
166 _memory_group.
manage(&_output_highp);
168 _gemmlowp.
configure(compile_context, &_input, &_weights_transposed,
nullptr, &_output_highp);
179 int output_multiplier = 0;
180 int output_shift = 0;
183 _memory_group.
manage(&_output_lowp);
184 _output_stage.
configure(compile_context, &_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
191 _memory_group.
manage(&_input_gate_input);
192 _slice_input_tensor.
configure(compile_context, &_output_lowp, &_input_gate_input, { 0, 0 }, {
output_size, batch_size });
193 _memory_group.
manage(&_forget_gate_input);
195 _memory_group.
manage(&_input_modulation_gate_input);
197 _memory_group.
manage(&_output_gate_input);
203 _memory_group.
manage(&_input_gate_input);
204 _slice_input_tensor.
configure(compile_context, &_output_lowp, &_input_gate_input, { 0 }, {
output_size });
205 _memory_group.
manage(&_forget_gate_input);
207 _memory_group.
manage(&_input_modulation_gate_input);
209 _memory_group.
manage(&_output_gate_input);
215 _memory_group.
manage(&_forget_gate_output);
221 _memory_group.
manage(&_input_gate_output);
227 _memory_group.
manage(&_input_modulation_gate_output);
233 _memory_group.
manage(&_output_gate_output);
239 _memory_group.
manage(&_cell_state_tmp1);
244 _memory_group.
manage(&_cell_state_tmp2);
255 _memory_group.
manage(&_output_state_tmp);
259 _memory_group.
manage(&_output_state_out_symm);
266 _memory_group.
manage(&_output_state_out_f32);
268 _dequantize.
configure(compile_context, &_output_state_out_symm, &_output_state_out_f32);
271 _quantize.
configure(compile_context, &_output_state_out_f32, output_state_out);
284 output_state_in, cell_state_out, output_state_out);
288 const int batch_size =
input->dimension(1);
325 std::vector<const ITensorInfo *> inputs_weights_vector;
335 std::vector<const ITensorInfo *> recurrent_weights_vector;
344 std::vector<const ITensorInfo *> weights_vector;
345 weights_vector.emplace_back(&recurrent_weights);
346 weights_vector.emplace_back(&input_weights);
351 TensorInfo weights_transposed = weights.
clone()->set_is_resizable(
true).set_tensor_shape(weights_transposed_shape);
355 std::vector<const ITensorInfo *> input_vector;
356 input_vector.emplace_back(
input);
357 input_vector.emplace_back(output_state_in);
362 std::vector<const ITensorInfo *> bias_vector;
365 bias_vector.emplace_back(cell_bias);
386 int output_multiplier = 0;
387 int output_shift = 0;
493 _concat_inputs.
run();
500 _slice_input_tensor.
run();
501 _slice_forget_tensor.
run();
502 _slice_cell_tensor.
run();
503 _slice_output_tensor.
run();
507 _sigmoid_forget_gate.
run();
510 _sigmoid_input_gate.
run();
513 _tanh_modulation_gate.
run();
516 _sigmoid_output_gate.
run();
519 _mul_forget_gate_cell_state.
run();
520 _mul_input_gate_input_mod_gate.
run();
521 _add_cell_state_tmps.
run();
524 _tanh_output_state.
run();
525 _mul_output_state_tmp_output_gate.
run();
537 _concat_input_weights.
run();
545 _concat_recurrent_weights.
run();
552 _concat_weights.
run();
560 _transpose_weights.
run();
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLDequantizationLayer.
quantized, symmetric fixed-point 16-bit number
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void run() override
Run the kernels contained in the function.
void prepare() override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
auto input_to_input_weights
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min=std::numeric_limits< int32_t >::lowest(), int max=std::numeric_limits< int32_t >::max())
Initialise the kernel's inputs, output.
QuantizationInfo qweights(1.f/16.f, 16)
1 channel, 1 F32 per channel
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
void configure(std::vector< const ICLTensor * > &inputs_vector, ICLTensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
auto recurrent_to_forget_weights
Store the tensor's metadata.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, ICLTensor *cell_state_in, const ICLTensor *output_state_in, ICLTensor *cell_state_out, ICLTensor *output_state_out)
Initialize function's tensors.
auto input_to_cell_weights
QuantizationInfo qsymm_3(8.f/32768.f, 0)
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
auto input_to_output_weights
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out)
Static function to check if given info will lead to a valid configuration of CLLSTMLayerQuantized.
void run() override final
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
auto recurrent_to_input_weights
quantized, asymmetric fixed-point 8-bit number unsigned
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
Static function to check if given info will lead to a valid configuration of CLSlice.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output)
Set the input and output tensors.
auto recurrent_to_cell_weights
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
void run() override
Run the kernels contained in the function.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
static Status validate(const std::vector< const ITensorInfo * > &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of CLConcatenateLayer.
Memory group resources scope handling class.
Interface for OpenCL tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
void configure(const ICLTensor *input, ICLTensor *output)
Set the input and output tensors.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min=std::numeric_limits< int32_t >::lowest(), int max=std::numeric_limits< int32_t >::max())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpQuantizeDownIn...
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
QuantizationInfo qasymm(1.f/128.f, 128)
Store the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(t,...)
QuantizationInfo qsymm_4(16.f/32768.f, 0)
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
void run() override
Run the kernels contained in the function.
Truncates the least significant values that are lost in operations.
CLLSTMLayerQuantized(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
auto input_to_forget_weights
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLQuantizationLayer.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of CLPixelWiseMultiplicatio...