40 const QuantizationInfo
qasymm(1.f / 128.f, 128);
41 const QuantizationInfo
qsymm_3(8.f / 32768.f, 0);
42 const QuantizationInfo
qsymm_4(16.f / 32768.f, 0);
43 const QuantizationInfo qsymm_0(1.f / 32768.f, 0);
47 : _memory_group(
std::move(memory_manager)), _gemmlowp(), _output_stage(), _transpose_weights(), _concat_input_weights(), _concat_recurrent_weights(), _concat_weights(), _concat_inputs(),
48 _concat_bias(), _sigmoid_forget_gate(), _sigmoid_input_gate(), _sigmoid_output_gate(), _tanh_modulation_gate(), _tanh_output_state(), _add_cell_state_tmps(), _add2(), _mul_forget_gate_cell_state(),
49 _mul_input_gate_input_mod_gate(), _mul_output_state_tmp_output_gate(), _slice_input_tensor(), _slice_forget_tensor(), _slice_cell_tensor(), _slice_output_tensor(), _dequantize(), _quantize(),
50 _input_to_input_weights(nullptr), _input_to_forget_weights(nullptr), _input_to_cell_weights(nullptr), _input_to_output_weights(nullptr), _recurrent_to_input_weights(nullptr),
51 _recurrent_to_forget_weights(nullptr), _recurrent_to_cell_weights(nullptr), _recurrent_to_output_weights(nullptr), _input_gate_bias(nullptr), _forget_gate_bias(nullptr), _cell_bias(nullptr),
52 _output_gate_bias(nullptr), _recurrent_weights(), _input_weights(), _weights(), _input(), _weights_transposed(), _output_highp(), _output_lowp(), _bias(), _forget_gate_input(), _input_gate_input(),
53 _output_gate_input(), _input_modulation_gate_input(), _forget_gate_output(), _input_gate_output(), _output_gate_output(), _input_modulation_gate_output(), _cell_state_tmp1(), _cell_state_tmp2(),
54 _output_state_tmp(), _output_state_out_symm(), _output_state_out_f32(), _is_prepared(false)
65 configure(
CLKernelLibrary::get().get_compile_context(), input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
66 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out,
78 recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
79 input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
82 input_to_output_weights->
info(),
83 recurrent_to_input_weights->
info(), recurrent_to_forget_weights->
info(), recurrent_to_cell_weights->
info(), recurrent_to_output_weights->
info(),
84 input_gate_bias->
info(), forget_gate_bias->
info(), cell_bias->
info(), output_gate_bias->
info(), cell_state_in->
info(), output_state_in->
info(), cell_state_out->
info(), output_state_out->
info()));
105 _cell_bias = cell_bias;
109 std::vector<const ICLTensor *> inputs_weights_vector;
110 inputs_weights_vector.emplace_back(input_to_input_weights);
111 inputs_weights_vector.emplace_back(input_to_forget_weights);
112 inputs_weights_vector.emplace_back(input_to_cell_weights);
113 inputs_weights_vector.emplace_back(input_to_output_weights);
115 std::vector<const ICLTensor *> recurrent_weights_vector;
116 recurrent_weights_vector.emplace_back(recurrent_to_input_weights);
117 recurrent_weights_vector.emplace_back(recurrent_to_forget_weights);
118 recurrent_weights_vector.emplace_back(recurrent_to_cell_weights);
119 recurrent_weights_vector.emplace_back(recurrent_to_output_weights);
122 _concat_input_weights.
configure(compile_context, inputs_weights_vector, &_input_weights,
Window::DimY);
125 _concat_recurrent_weights.
configure(compile_context, recurrent_weights_vector, &_recurrent_weights,
Window::DimY);
127 std::vector<const ICLTensor *> weights_vector;
128 weights_vector.emplace_back(&_recurrent_weights);
129 weights_vector.emplace_back(&_input_weights);
133 _transpose_weights.
configure(compile_context, &_weights, &_weights_transposed);
136 std::vector<const ICLTensor *> input_vector;
137 input_vector.emplace_back(input);
138 input_vector.emplace_back(output_state_in);
140 _memory_group.
manage(&_input);
145 std::vector<const ICLTensor *> bias_vector;
146 bias_vector.emplace_back(input_gate_bias);
147 bias_vector.emplace_back(forget_gate_bias);
148 bias_vector.emplace_back(cell_bias);
149 bias_vector.emplace_back(output_gate_bias);
159 _memory_group.
manage(&_output_highp);
161 _gemmlowp.
configure(compile_context, &_input, &_weights_transposed,
nullptr, &_output_highp);
172 int output_multiplier = 0;
173 int output_shift = 0;
176 _memory_group.
manage(&_output_lowp);
180 info.gemmlowp_multiplier = output_multiplier;
181 info.gemmlowp_shift = output_shift;
183 _output_stage.
configure(compile_context, &_output_highp, &_bias, &_output_lowp,
info);
190 _memory_group.
manage(&_input_gate_input);
191 _slice_input_tensor.
configure(compile_context, &_output_lowp, &_input_gate_input, { 0, 0 }, {
output_size, batch_size });
192 _memory_group.
manage(&_forget_gate_input);
194 _memory_group.
manage(&_input_modulation_gate_input);
196 _memory_group.
manage(&_output_gate_input);
202 _memory_group.
manage(&_input_gate_input);
203 _slice_input_tensor.
configure(compile_context, &_output_lowp, &_input_gate_input, { 0 }, { output_size });
204 _memory_group.
manage(&_forget_gate_input);
205 _slice_forget_tensor.
configure(compile_context, &_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size });
206 _memory_group.
manage(&_input_modulation_gate_input);
207 _slice_cell_tensor.
configure(compile_context, &_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size });
208 _memory_group.
manage(&_output_gate_input);
209 _slice_output_tensor.
configure(compile_context, &_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size });
214 _memory_group.
manage(&_forget_gate_output);
220 _memory_group.
manage(&_input_gate_output);
226 _memory_group.
manage(&_input_modulation_gate_output);
232 _memory_group.
manage(&_output_gate_output);
238 _memory_group.
manage(&_cell_state_tmp1);
243 _memory_group.
manage(&_cell_state_tmp2);
254 _memory_group.
manage(&_output_state_tmp);
258 _memory_group.
manage(&_output_state_out_symm);
265 _memory_group.
manage(&_output_state_out_f32);
267 _dequantize.
configure(compile_context, &_output_state_out_symm, &_output_state_out_f32);
270 _quantize.
configure(compile_context, &_output_state_out_f32, output_state_out);
282 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in,
283 output_state_in, cell_state_out, output_state_out);
287 const int batch_size = input->
dimension(1);
324 std::vector<const ITensorInfo *> inputs_weights_vector;
325 inputs_weights_vector.emplace_back(input_to_input_weights);
326 inputs_weights_vector.emplace_back(input_to_forget_weights);
327 inputs_weights_vector.emplace_back(input_to_cell_weights);
328 inputs_weights_vector.emplace_back(input_to_output_weights);
334 std::vector<const ITensorInfo *> recurrent_weights_vector;
335 recurrent_weights_vector.emplace_back(recurrent_to_input_weights);
336 recurrent_weights_vector.emplace_back(recurrent_to_forget_weights);
337 recurrent_weights_vector.emplace_back(recurrent_to_cell_weights);
338 recurrent_weights_vector.emplace_back(recurrent_to_output_weights);
343 std::vector<const ITensorInfo *> weights_vector;
344 weights_vector.emplace_back(&recurrent_weights);
345 weights_vector.emplace_back(&input_weights);
350 TensorInfo weights_transposed = weights.
clone()->set_is_resizable(
true).set_tensor_shape(weights_transposed_shape);
354 std::vector<const ITensorInfo *> input_vector;
355 input_vector.emplace_back(input);
356 input_vector.emplace_back(output_state_in);
361 std::vector<const ITensorInfo *> bias_vector;
362 bias_vector.emplace_back(input_gate_bias);
363 bias_vector.emplace_back(forget_gate_bias);
364 bias_vector.emplace_back(cell_bias);
365 bias_vector.emplace_back(output_gate_bias);
385 int output_multiplier = 0;
386 int output_shift = 0;
392 info.gemmlowp_multiplier = output_multiplier;
393 info.gemmlowp_shift = output_shift;
497 _concat_inputs.
run();
504 _slice_input_tensor.
run();
505 _slice_forget_tensor.
run();
506 _slice_cell_tensor.
run();
507 _slice_output_tensor.
run();
511 _sigmoid_forget_gate.
run();
514 _sigmoid_input_gate.
run();
517 _tanh_modulation_gate.
run();
520 _sigmoid_output_gate.
run();
523 _mul_forget_gate_cell_state.
run();
524 _mul_input_gate_input_mod_gate.
run();
525 _add_cell_state_tmps.
run();
528 _tanh_output_state.
run();
529 _mul_output_state_tmp_output_gate.
run();
541 _concat_input_weights.
run();
549 _concat_recurrent_weights.
run();
556 _concat_weights.
run();
564 _transpose_weights.
run();
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLDequantizationLayer.
Quantize using a fixed point multiplication.
quantized, symmetric fixed-point 16-bit number
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void run() override
Run the kernels contained in the function.
void prepare() override
Prepare the function for executing.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
auto input_to_input_weights
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
QuantizationInfo qweights(1.f/16.f, 16)
1 channel, 1 F32 per channel
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
auto recurrent_to_forget_weights
Store the tensor's metadata.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
void configure(std::vector< const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, ICLTensor *cell_state_in, const ICLTensor *output_state_in, ICLTensor *cell_state_out, ICLTensor *output_state_out)
Initialize function's tensors.
auto input_to_cell_weights
QuantizationInfo qsymm_3(8.f/32768.f, 0)
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClGemmL...
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
auto input_to_output_weights
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out)
Static function to check if given info will lead to a valid configuration of CLLSTMLayerQuantized.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
auto recurrent_to_input_weights
quantized, asymmetric fixed-point 8-bit number unsigned
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
Static function to check if given info will lead to a valid configuration of CLSlice.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output)
Set the input and output tensors.
auto recurrent_to_cell_weights
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
void run() override
Run the kernels contained in the function.
static Status validate(const std::vector< const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of CLConcatenateLayer.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Memory group resources scope handling class.
Interface for OpenCL tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
void configure(const ICLTensor *input, ICLTensor *output)
Set the input and output tensors.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
QuantizationInfo qasymm(1.f/128.f, 128)
Store the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(t,...)
QuantizationInfo qsymm_4(16.f/32768.f, 0)
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
void run() override
Run the kernels contained in the function.
Truncates the least significant values that are lost in operations.
CLLSTMLayerQuantized(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
auto input_to_forget_weights
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLQuantizationLayer.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of CLPixelWiseMultiplicatio...