42 const QuantizationInfo
qasymm(1.f / 128.f, 128);
43 const QuantizationInfo
qsymm_3(8.f / 32768.f, 0);
44 const QuantizationInfo
qsymm_4(16.f / 32768.f, 0);
45 const QuantizationInfo qsymm_0(1.f / 32768.f, 0);
49 : _memory_group(
std::move(memory_manager)), _gemmlowp(), _output_stage(), _transpose_weights(), _concat_input_weights(), _concat_recurrent_weights(), _concat_weights(), _concat_inputs(),
50 _concat_bias(), _sigmoid_forget_gate(), _sigmoid_input_gate(), _sigmoid_output_gate(), _tanh_modulation_gate(), _tanh_output_state(), _add_cell_state_tmps(), _add2(), _mul_forget_gate_cell_state(),
51 _mul_input_gate_input_mod_gate(), _mul_output_state_tmp_output_gate(), _slice_input_tensor(), _slice_forget_tensor(), _slice_cell_tensor(), _slice_output_tensor(), _dequantize(), _quantize(),
52 _input_to_input_weights(nullptr), _input_to_forget_weights(nullptr), _input_to_cell_weights(nullptr), _input_to_output_weights(nullptr), _recurrent_to_input_weights(nullptr),
53 _recurrent_to_forget_weights(nullptr), _recurrent_to_cell_weights(nullptr), _recurrent_to_output_weights(nullptr), _input_gate_bias(nullptr), _forget_gate_bias(nullptr), _cell_bias(nullptr),
54 _output_gate_bias(nullptr), _recurrent_weights(), _input_weights(), _weights(), _input(), _weights_transposed(), _output_highp(), _output_lowp(), _bias(), _forget_gate_input(), _input_gate_input(),
55 _output_gate_input(), _input_modulation_gate_input(), _forget_gate_output(), _input_gate_output(), _output_gate_output(), _input_modulation_gate_output(), _cell_state_tmp1(), _cell_state_tmp2(),
56 _output_state_tmp(), _output_state_out_symm(), _output_state_out_f32(), _is_prepared(false)
67 configure(
CLKernelLibrary::get().get_compile_context(), input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
68 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out,
80 recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
81 input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
83 ARM_COMPUTE_LOG_PARAMS(input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
84 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out,
88 input_to_output_weights->
info(),
89 recurrent_to_input_weights->
info(), recurrent_to_forget_weights->
info(), recurrent_to_cell_weights->
info(), recurrent_to_output_weights->
info(),
90 input_gate_bias->
info(), forget_gate_bias->
info(), cell_bias->
info(), output_gate_bias->
info(), cell_state_in->
info(), output_state_in->
info(), cell_state_out->
info(), output_state_out->
info()));
111 _cell_bias = cell_bias;
115 std::vector<const ICLTensor *> inputs_weights_vector;
116 inputs_weights_vector.emplace_back(input_to_input_weights);
117 inputs_weights_vector.emplace_back(input_to_forget_weights);
118 inputs_weights_vector.emplace_back(input_to_cell_weights);
119 inputs_weights_vector.emplace_back(input_to_output_weights);
121 std::vector<const ICLTensor *> recurrent_weights_vector;
122 recurrent_weights_vector.emplace_back(recurrent_to_input_weights);
123 recurrent_weights_vector.emplace_back(recurrent_to_forget_weights);
124 recurrent_weights_vector.emplace_back(recurrent_to_cell_weights);
125 recurrent_weights_vector.emplace_back(recurrent_to_output_weights);
128 _concat_input_weights.
configure(compile_context, inputs_weights_vector, &_input_weights,
Window::DimY);
131 _concat_recurrent_weights.
configure(compile_context, recurrent_weights_vector, &_recurrent_weights,
Window::DimY);
133 std::vector<const ICLTensor *> weights_vector;
134 weights_vector.emplace_back(&_recurrent_weights);
135 weights_vector.emplace_back(&_input_weights);
139 _transpose_weights.
configure(compile_context, &_weights, &_weights_transposed);
142 std::vector<const ICLTensor *> input_vector;
143 input_vector.emplace_back(input);
144 input_vector.emplace_back(output_state_in);
146 _memory_group.
manage(&_input);
151 std::vector<const ICLTensor *> bias_vector;
152 bias_vector.emplace_back(input_gate_bias);
153 bias_vector.emplace_back(forget_gate_bias);
154 bias_vector.emplace_back(cell_bias);
155 bias_vector.emplace_back(output_gate_bias);
165 _memory_group.
manage(&_output_highp);
167 _gemmlowp.
configure(compile_context, &_input, &_weights_transposed,
nullptr, &_output_highp);
178 int output_multiplier = 0;
179 int output_shift = 0;
182 _memory_group.
manage(&_output_lowp);
186 info.gemmlowp_multiplier = output_multiplier;
187 info.gemmlowp_shift = output_shift;
189 _output_stage.
configure(compile_context, &_output_highp, &_bias, &_output_lowp,
info);
196 _memory_group.
manage(&_input_gate_input);
197 _slice_input_tensor.
configure(compile_context, &_output_lowp, &_input_gate_input, { 0, 0 }, {
output_size, batch_size });
198 _memory_group.
manage(&_forget_gate_input);
200 _memory_group.
manage(&_input_modulation_gate_input);
202 _memory_group.
manage(&_output_gate_input);
208 _memory_group.
manage(&_input_gate_input);
209 _slice_input_tensor.
configure(compile_context, &_output_lowp, &_input_gate_input, { 0 }, { output_size });
210 _memory_group.
manage(&_forget_gate_input);
211 _slice_forget_tensor.
configure(compile_context, &_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size });
212 _memory_group.
manage(&_input_modulation_gate_input);
213 _slice_cell_tensor.
configure(compile_context, &_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size });
214 _memory_group.
manage(&_output_gate_input);
215 _slice_output_tensor.
configure(compile_context, &_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size });
220 _memory_group.
manage(&_forget_gate_output);
226 _memory_group.
manage(&_input_gate_output);
232 _memory_group.
manage(&_input_modulation_gate_output);
238 _memory_group.
manage(&_output_gate_output);
244 _memory_group.
manage(&_cell_state_tmp1);
249 _memory_group.
manage(&_cell_state_tmp2);
260 _memory_group.
manage(&_output_state_tmp);
264 _memory_group.
manage(&_output_state_out_symm);
271 _memory_group.
manage(&_output_state_out_f32);
273 _dequantize.
configure(compile_context, &_output_state_out_symm, &_output_state_out_f32);
276 _quantize.
configure(compile_context, &_output_state_out_f32, output_state_out);
288 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in,
289 output_state_in, cell_state_out, output_state_out);
293 const int batch_size = input->
dimension(1);
330 std::vector<const ITensorInfo *> inputs_weights_vector;
331 inputs_weights_vector.emplace_back(input_to_input_weights);
332 inputs_weights_vector.emplace_back(input_to_forget_weights);
333 inputs_weights_vector.emplace_back(input_to_cell_weights);
334 inputs_weights_vector.emplace_back(input_to_output_weights);
340 std::vector<const ITensorInfo *> recurrent_weights_vector;
341 recurrent_weights_vector.emplace_back(recurrent_to_input_weights);
342 recurrent_weights_vector.emplace_back(recurrent_to_forget_weights);
343 recurrent_weights_vector.emplace_back(recurrent_to_cell_weights);
344 recurrent_weights_vector.emplace_back(recurrent_to_output_weights);
349 std::vector<const ITensorInfo *> weights_vector;
350 weights_vector.emplace_back(&recurrent_weights);
351 weights_vector.emplace_back(&input_weights);
356 TensorInfo weights_transposed = weights.
clone()->set_is_resizable(
true).set_tensor_shape(weights_transposed_shape);
360 std::vector<const ITensorInfo *> input_vector;
361 input_vector.emplace_back(input);
362 input_vector.emplace_back(output_state_in);
367 std::vector<const ITensorInfo *> bias_vector;
368 bias_vector.emplace_back(input_gate_bias);
369 bias_vector.emplace_back(forget_gate_bias);
370 bias_vector.emplace_back(cell_bias);
371 bias_vector.emplace_back(output_gate_bias);
391 int output_multiplier = 0;
392 int output_shift = 0;
398 info.gemmlowp_multiplier = output_multiplier;
399 info.gemmlowp_shift = output_shift;
503 _concat_inputs.
run();
510 _slice_input_tensor.
run();
511 _slice_forget_tensor.
run();
512 _slice_cell_tensor.
run();
513 _slice_output_tensor.
run();
517 _sigmoid_forget_gate.
run();
520 _sigmoid_input_gate.
run();
523 _tanh_modulation_gate.
run();
526 _sigmoid_output_gate.
run();
529 _mul_forget_gate_cell_state.
run();
530 _mul_input_gate_input_mod_gate.
run();
531 _add_cell_state_tmps.
run();
534 _tanh_output_state.
run();
535 _mul_output_state_tmp_output_gate.
run();
547 _concat_input_weights.
run();
555 _concat_recurrent_weights.
run();
562 _concat_weights.
run();
570 _transpose_weights.
run();
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLDequantizationLayer.
Quantize using a fixed point multiplication.
quantized, symmetric fixed-point 16-bit number
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void run() override
Run the kernels contained in the function.
void prepare() override
Prepare the function for executing.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
auto input_to_input_weights
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
QuantizationInfo qweights(1.f/16.f, 16)
1 channel, 1 F32 per channel
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
auto recurrent_to_forget_weights
Store the tensor's metadata.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
void configure(std::vector< const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, ICLTensor *cell_state_in, const ICLTensor *output_state_in, ICLTensor *cell_state_out, ICLTensor *output_state_out)
Initialize function's tensors.
auto input_to_cell_weights
QuantizationInfo qsymm_3(8.f/32768.f, 0)
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClGemmL...
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
auto input_to_output_weights
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out)
Static function to check if given info will lead to a valid configuration of CLLSTMLayerQuantized.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
auto recurrent_to_input_weights
quantized, asymmetric fixed-point 8-bit number unsigned
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
Static function to check if given info will lead to a valid configuration of CLSlice.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output)
Set the input and output tensors.
auto recurrent_to_cell_weights
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
void run() override
Run the kernels contained in the function.
static Status validate(const std::vector< const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of CLConcatenateLayer.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Memory group resources scope handling class.
Interface for OpenCL tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
void configure(const ICLTensor *input, ICLTensor *output)
Set the input and output tensors.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
QuantizationInfo qasymm(1.f/128.f, 128)
Store the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(t,...)
QuantizationInfo qsymm_4(16.f/32768.f, 0)
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
void run() override
Run the kernels contained in the function.
Truncates the least significant values that are lost in operations.
CLLSTMLayerQuantized(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
auto input_to_forget_weights
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLQuantizationLayer.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of CLPixelWiseMultiplicatio...