Compute Library
 23.11
NELSTMLayerQuantized.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "arm_compute/core/Utils.h"
29 
30 #include "src/common/utils/Log.h"
32 
33 #include <cmath>
34 #include <memory>
35 #include <tuple>
36 
37 namespace arm_compute
38 {
39 namespace
40 {
41 // Quantization info structures used in the LSTMQuantize layer
42 const QuantizationInfo qasymm(1.f / 128.f, 128);
43 const QuantizationInfo qsymm_3(8.f / 32768.f, 0); // qsymm16 with 3 integer bit
44 const QuantizationInfo qsymm_4(16.f / 32768.f, 0); // qsymm16 with 4 integer bit
45 const QuantizationInfo qsymm_0(1.f / 32768.f, 0); // qsymm16 with 0 integer bit
46 } // namespace
48 
49 NELSTMLayerQuantized::NELSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager)
50  : _memory_group(std::move(memory_manager)),
51  _gemmlowp(),
52  _output_stage(),
53  _transpose_weights(),
54  _concat_input_weights(),
55  _concat_recurrent_weights(),
56  _concat_weights(),
57  _concat_inputs(),
58  _concat_bias(),
59  _sigmoid_forget_gate(),
60  _sigmoid_input_gate(),
61  _sigmoid_output_gate(),
62  _tanh_modulation_gate(),
63  _tanh_output_state(),
64  _add1(),
65  _add2(),
66  _mul1(),
67  _mul2(),
68  _mul3(),
69  _slice_input_tensor(),
70  _slice_forget_tensor(),
71  _slice_cell_tensor(),
72  _slice_output_tensor(),
73  _dequantize(),
74  _quantize(),
75  _input_to_input_weights(nullptr),
76  _input_to_forget_weights(nullptr),
77  _input_to_cell_weights(nullptr),
78  _input_to_output_weights(nullptr),
79  _recurrent_to_input_weights(nullptr),
80  _recurrent_to_forget_weights(nullptr),
81  _recurrent_to_cell_weights(nullptr),
82  _recurrent_to_output_weights(nullptr),
83  _input_gate_bias(nullptr),
84  _forget_gate_bias(nullptr),
85  _cell_bias(nullptr),
86  _output_gate_bias(nullptr),
87  _recurrent_weights(),
88  _input_weights(),
89  _weights(),
90  _input(),
91  _weights_transposed(),
92  _output_highp(),
93  _output_lowp(),
94  _bias(),
95  _forget_gate_input(),
96  _input_gate_input(),
97  _output_gate_input(),
98  _input_modulation_gate_input(),
99  _forget_gate_output(),
100  _input_gate_output(),
101  _output_gate_output(),
102  _input_modulation_gate_output(),
103  _cell_state1(),
104  _cell_state2(),
105  _output_state_tmp(),
106  _output_state_out_symm(),
107  _output_state_out_f32(),
108  _is_prepared(false)
109 {
110 }
111 
121  const ITensor *input_gate_bias,
122  const ITensor *forget_gate_bias,
123  const ITensor *cell_bias,
124  const ITensor *output_gate_bias,
125  ITensor *cell_state_in,
126  const ITensor *output_state_in,
127  ITensor *cell_state_out,
128  ITensor *output_state_out)
129 {
133  forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
134  cell_state_out, output_state_out);
135 
140  forget_gate_bias->info(), cell_bias->info(), output_gate_bias->info(), cell_state_in->info(),
141  output_state_in->info(), cell_state_out->info(), output_state_out->info()));
142 
146  cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out,
147  output_state_out);
148 
149  const int input_size = input->info()->dimension(0);
150  const int batch_size = input->info()->dimension(1);
151  const int output_size = input_to_input_weights->info()->dimension(1);
152 
153  const QuantizationInfo qweights = input_to_input_weights->info()->quantization_info(); // Weights quantization
154 
155  auto_init_if_empty(*cell_state_out->info(),
157  auto_init_if_empty(*output_state_out->info(),
159 
160  _input_to_input_weights = input_to_input_weights;
161  _input_to_forget_weights = input_to_forget_weights;
162  _input_to_cell_weights = input_to_cell_weights;
163  _input_to_output_weights = input_to_output_weights;
164  _recurrent_to_input_weights = recurrent_to_input_weights;
165  _recurrent_to_forget_weights = recurrent_to_forget_weights;
166  _recurrent_to_cell_weights = recurrent_to_cell_weights;
167  _recurrent_to_output_weights = recurrent_to_output_weights;
168  _input_gate_bias = input_gate_bias;
169  _forget_gate_bias = forget_gate_bias;
170  _cell_bias = cell_bias;
171  _output_gate_bias = output_gate_bias;
172 
173  // Weights concatenation
174  std::vector<const ITensor *> inputs_weights_vector{input_to_input_weights, input_to_forget_weights,
176  std::vector<const ITensor *> recurrent_weights_vector{recurrent_to_input_weights, recurrent_to_forget_weights,
178 
179  _input_weights.allocator()->init(
181  _concat_input_weights.configure(inputs_weights_vector, &_input_weights, Window::DimY);
182 
183  _recurrent_weights.allocator()->init(
185  _concat_recurrent_weights.configure(recurrent_weights_vector, &_recurrent_weights, Window::DimY);
186 
187  std::vector<const ITensor *> weights_vector{&_recurrent_weights, &_input_weights};
188  _weights.allocator()->init(
190  _concat_weights.configure(weights_vector, &_weights, Window::DimX);
191  _transpose_weights.configure(&_weights, &_weights_transposed);
192 
193  // Input concatenation
194  std::vector<const ITensor *> input_vector{input, output_state_in};
195  _memory_group.manage(&_input);
196  _input.allocator()->init(
198  _concat_inputs.configure(input_vector, &_input, Window::DimX);
199 
200  // Bias concatenation
201  std::vector<const ITensor *> bias_vector{input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias};
203  _concat_bias.configure(bias_vector, &_bias, Window::DimX);
204 
205  // Invert the offset for gemmlowp
207  _weights_transposed.info()->set_quantization_info(
209 
210  // Run gemmlowp
211  _memory_group.manage(&_output_highp);
212  _output_highp.allocator()->init(TensorInfo(TensorShape(4 * output_size, batch_size), 1, DataType::S32));
213  _gemmlowp.configure(&_input, &_weights_transposed, nullptr, &_output_highp);
214  _input.allocator()->allocate();
215 
216  // Set the offset back
218  _weights_transposed.info()->set_quantization_info(
220 
221  // multiplier = (input_scale * weights_scale) / output_scale (2 ^ (-12))
222  _output_lowp.allocator()->init(TensorInfo(_output_highp.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_3));
223 
224  const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale;
225  int32_t output_multiplier = 0;
226  int32_t output_shift = 0;
227  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
228 
229  _memory_group.manage(&_output_lowp);
230 
233  info.gemmlowp_multiplier = output_multiplier;
234  info.gemmlowp_shift = output_shift;
235  info.output_data_type = DataType::QSYMM16;
236  _output_stage.configure(&_output_highp, &_bias, &_output_lowp, info);
237  _output_highp.allocator()->allocate();
238  _bias.allocator()->allocate();
239 
240  // Get the gate tensors
241  if (batch_size > 1)
242  {
243  _memory_group.manage(&_input_gate_input);
244  _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, {0, 0}, {output_size, batch_size});
245  _memory_group.manage(&_forget_gate_input);
246  _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, {output_size, 0},
247  {2 * output_size, batch_size});
248  _memory_group.manage(&_input_modulation_gate_input);
249  _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, {2 * output_size, 0},
250  {3 * output_size, batch_size});
251  _memory_group.manage(&_output_gate_input);
252  _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, {3 * output_size, 0},
253  {4 * output_size, batch_size});
254  _output_lowp.allocator()->allocate();
255  }
256  else
257  {
258  _memory_group.manage(&_input_gate_input);
259  _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, {0}, {output_size});
260  _memory_group.manage(&_forget_gate_input);
261  _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, {output_size}, {2 * output_size});
262  _memory_group.manage(&_input_modulation_gate_input);
263  _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, {2 * output_size},
264  {3 * output_size});
265  _memory_group.manage(&_output_gate_input);
266  _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, {3 * output_size}, {4 * output_size});
267  _output_lowp.allocator()->allocate();
268  }
269 
270  // Forget gate
271  _memory_group.manage(&_forget_gate_output);
272  _forget_gate_output.allocator()->init(
273  TensorInfo(_forget_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
274  _sigmoid_forget_gate.configure(&_forget_gate_input, &_forget_gate_output,
275  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
276  _forget_gate_input.allocator()->allocate();
277 
278  // Input gate
279  _memory_group.manage(&_input_gate_output);
280  _input_gate_output.allocator()->init(
281  TensorInfo(_input_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
282  _sigmoid_input_gate.configure(&_input_gate_input, &_input_gate_output,
283  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
284  _input_gate_input.allocator()->allocate();
285 
286  // Input modulation gate equation
287  _memory_group.manage(&_input_modulation_gate_output);
288  _input_modulation_gate_output.allocator()->init(
289  TensorInfo(_input_modulation_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
290  _tanh_modulation_gate.configure(&_input_modulation_gate_input, &_input_modulation_gate_output,
291  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
292  _input_modulation_gate_input.allocator()->allocate();
293 
294  // Output gate
295  _memory_group.manage(&_output_gate_output);
296  _output_gate_output.allocator()->init(
297  TensorInfo(_output_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
298  _sigmoid_output_gate.configure(&_output_gate_input, &_output_gate_output,
299  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
300  _output_gate_input.allocator()->allocate();
301 
302  // Long term memory
303  _memory_group.manage(&_cell_state1);
304  _cell_state1.allocator()->init(
305  TensorInfo(_forget_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4));
306  _mul1.configure(&_forget_gate_output, cell_state_in, &_cell_state1, 1, ConvertPolicy::SATURATE,
308  _forget_gate_output.allocator()->allocate();
309 
310  _memory_group.manage(&_cell_state2);
311  _cell_state2.allocator()->init(
312  TensorInfo(_input_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4));
313  _mul2.configure(&_input_gate_output, &_input_modulation_gate_output, &_cell_state2, 1, ConvertPolicy::SATURATE,
315  _input_modulation_gate_output.allocator()->allocate();
316  _input_gate_output.allocator()->allocate();
317 
318  _add1.configure(&_cell_state1, &_cell_state2, cell_state_out, ConvertPolicy::SATURATE);
319  _cell_state1.allocator()->allocate();
320  _cell_state2.allocator()->allocate();
321 
322  // Short term memory
323  _memory_group.manage(&_output_state_tmp);
324  _output_state_tmp.allocator()->init(
325  TensorInfo(cell_state_out->info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
326  _tanh_output_state.configure(cell_state_out, &_output_state_tmp,
327  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
328 
329  _memory_group.manage(&_output_state_out_symm);
330  _output_state_out_symm.allocator()->init(
331  TensorInfo(_output_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
332  _mul3.configure(&_output_state_tmp, &_output_gate_output, &_output_state_out_symm, 1, ConvertPolicy::SATURATE,
334  _output_gate_output.allocator()->allocate();
335  _output_state_tmp.allocator()->allocate();
336 
337  // Requantize the output state from QSYMM16 to QASYMM8
338  _memory_group.manage(&_output_state_out_f32);
339  _output_state_out_f32.allocator()->init(
340  TensorInfo(_output_state_out_symm.info()->tensor_shape(), 1, DataType::F32));
341  _dequantize.configure(&_output_state_out_symm, &_output_state_out_f32);
342  _output_state_out_symm.allocator()->allocate();
343 
344  _quantize.configure(&_output_state_out_f32, output_state_out);
345  _output_state_out_f32.allocator()->allocate();
346 }
347 
359  const ITensorInfo *cell_bias,
361  const ITensorInfo *cell_state_in,
362  const ITensorInfo *output_state_in,
363  const ITensorInfo *cell_state_out,
364  const ITensorInfo *output_state_out)
365 {
369  input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out,
370  output_state_out);
371 
372  const int input_size = input->dimension(0);
373  const int batch_size = input->dimension(1);
374  const int output_size = input_to_input_weights->dimension(1);
375 
376  // Dimensionality checks
377  ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
379  ARM_COMPUTE_RETURN_ERROR_ON(input_gate_bias->num_dimensions() > 1);
380  ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() > 2);
381 
382  TensorInfo input_weights_info(input_to_input_weights->clone()
383  ->set_tensor_shape(TensorShape(input_size, output_size))
384  .set_data_type(DataType::QASYMM8));
385  TensorInfo recurrent_weights_info(input_to_input_weights->clone()
386  ->set_tensor_shape(TensorShape(output_size, output_size))
387  .set_data_type(DataType::QASYMM8));
388  TensorInfo bias_info(
389  input_gate_bias->clone()->set_tensor_shape(TensorShape(output_size)).set_data_type(DataType::S32));
390  TensorInfo output_state_info(cell_state_in->clone()
391  ->set_tensor_shape(TensorShape(output_size, batch_size))
392  .set_data_type(DataType::QASYMM8)
393  .set_quantization_info(qasymm));
394  TensorInfo cell_state_info(cell_state_in->clone()
395  ->set_tensor_shape(TensorShape(output_size, batch_size))
396  .set_data_type(DataType::QSYMM16)
397  .set_quantization_info(qsymm_4));
398 
399  // Shape checks
407  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&cell_state_info, cell_state_in);
408  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&output_state_info, output_state_in);
409 
410  // Data type checks
418  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&cell_state_info, cell_state_in);
419  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&output_state_info, output_state_in);
420 
421  // Quantization checks
427  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&cell_state_info, cell_state_in);
428  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&output_state_info, output_state_in);
429 
430  // Validate internal functions
431  // _concat_input_weights
432  std::vector<const ITensorInfo *> inputs_weights_vector;
433  inputs_weights_vector.emplace_back(input_to_input_weights);
434  inputs_weights_vector.emplace_back(input_to_forget_weights);
435  inputs_weights_vector.emplace_back(input_to_cell_weights);
436  inputs_weights_vector.emplace_back(input_to_output_weights);
437  const QuantizationInfo qweights = input_to_input_weights->quantization_info(); // Weights quantization
438  const TensorInfo input_weights(TensorShape(input_size, 4 * output_size), 1, DataType::QASYMM8, qweights);
439  ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(inputs_weights_vector, &input_weights, Window::DimY));
440 
441  // _concat_recurrent_weights
442  std::vector<const ITensorInfo *> recurrent_weights_vector;
443  recurrent_weights_vector.emplace_back(recurrent_to_input_weights);
444  recurrent_weights_vector.emplace_back(recurrent_to_forget_weights);
445  recurrent_weights_vector.emplace_back(recurrent_to_cell_weights);
446  recurrent_weights_vector.emplace_back(recurrent_to_output_weights);
447  const TensorInfo recurrent_weights(TensorShape(output_size, 4 * output_size), 1, DataType::QASYMM8, qweights);
449  NEConcatenateLayer::validate(recurrent_weights_vector, &recurrent_weights, Window::DimY));
450 
451  // _concat_weights
452  std::vector<const ITensorInfo *> weights_vector;
453  weights_vector.emplace_back(&recurrent_weights);
454  weights_vector.emplace_back(&input_weights);
457  // _transpose_weights
458  const TensorShape weights_transposed_shape(weights.tensor_shape()[1], weights.tensor_shape()[0]);
459  TensorInfo weights_transposed = weights.clone()->set_is_resizable(true).set_tensor_shape(weights_transposed_shape);
460  ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(&weights, &weights_transposed));
461 
462  // _concat_inputs
463  std::vector<const ITensorInfo *> input_vector;
464  input_vector.emplace_back(input);
465  input_vector.emplace_back(output_state_in);
466  TensorInfo input_concatenated(TensorShape(output_size + input_size, batch_size), 1, DataType::QASYMM8, qasymm);
467  ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(input_vector, &input_concatenated, Window::DimX));
468 
469  // _concat_bias
470  std::vector<const ITensorInfo *> bias_vector;
471  bias_vector.emplace_back(input_gate_bias);
472  bias_vector.emplace_back(forget_gate_bias);
473  bias_vector.emplace_back(cell_bias);
474  bias_vector.emplace_back(output_gate_bias);
475 
476  const TensorInfo bias_concatenated(TensorShape(4 * output_size), 1, DataType::S32);
477  ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(bias_vector, &bias_concatenated, Window::DimX));
478 
479  // Invert the offset for gemmlowp
482 
483  // _gemmlowp
484  const TensorInfo output_highp(TensorShape(4 * output_size, batch_size), 1, DataType::S32);
486  NEGEMMLowpMatrixMultiplyCore::validate(&input_concatenated, &weights_transposed, nullptr, &output_highp));
487 
488  // Set the offset back
491 
492  const TensorInfo output_lowp(output_highp.tensor_shape(), 1, DataType::QSYMM16, qsymm_3);
493 
494  const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale;
495  int32_t output_multiplier = 0;
496  int32_t output_shift = 0;
498  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
499 
500  // _output_stage
503  info.gemmlowp_multiplier = output_multiplier;
504  info.gemmlowp_shift = output_shift;
505  info.output_data_type = DataType::QSYMM16;
506  ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&output_highp, &bias_concatenated, &output_lowp, info));
507 
508  TensorInfo input_gate_input;
509  TensorInfo forget_gate_input;
510  TensorInfo input_modulation_gate_input;
511  TensorInfo output_gate_input;
512 
513  if (batch_size > 1)
514  {
515  // _slice_input_tensor
516  input_gate_input = TensorInfo(TensorShape(output_size, batch_size), 1, DataType::QSYMM16, qsymm_3);
518  NESlice::validate(&output_lowp, &input_gate_input, {0, 0}, {output_size, batch_size}));
519  // _slice_forget_tensor
520  forget_gate_input = TensorInfo(TensorShape(output_size, batch_size), 1, DataType::QSYMM16, qsymm_3);
522  NESlice::validate(&output_lowp, &forget_gate_input, {output_size, 0}, {2 * output_size, batch_size}));
523  // _slice_cell_tensor
524  input_modulation_gate_input = TensorInfo(TensorShape(output_size, batch_size), 1, DataType::QSYMM16, qsymm_3);
525  ARM_COMPUTE_RETURN_ON_ERROR(NESlice::validate(&output_lowp, &input_modulation_gate_input, {2 * output_size, 0},
526  {3 * output_size, batch_size}));
527  // _slice_output_tensor
528  output_gate_input = TensorInfo(TensorShape(output_size, batch_size), 1, DataType::QSYMM16, qsymm_3);
530  NESlice::validate(&output_lowp, &output_gate_input, {3 * output_size, 0}, {4 * output_size, batch_size}));
531  }
532  else
533  {
534  // _slice_input_tensor
535  input_gate_input = TensorInfo(TensorShape(output_size), 1, DataType::QSYMM16, qsymm_3);
536  ARM_COMPUTE_RETURN_ON_ERROR(NESlice::validate(&output_lowp, &input_gate_input, {0}, {output_size}));
537  // _slice_forget_tensor
538  forget_gate_input = TensorInfo(TensorShape(output_size), 1, DataType::QSYMM16, qsymm_3);
540  NESlice::validate(&output_lowp, &forget_gate_input, {output_size}, {2 * output_size}));
541  // _slice_cell_tensor
542  input_modulation_gate_input = TensorInfo(TensorShape(output_size), 1, DataType::QSYMM16, qsymm_3);
544  NESlice::validate(&output_lowp, &input_modulation_gate_input, {2 * output_size}, {3 * output_size}));
545  // _slice_output_tensor
546  output_gate_input = TensorInfo(TensorShape(output_size), 1, DataType::QSYMM16, qsymm_3);
548  NESlice::validate(&output_lowp, &output_gate_input, {3 * output_size}, {4 * output_size}));
549  }
550 
551  // _sigmoid_forget_gate
552  const TensorInfo forget_gate_output(forget_gate_input.tensor_shape(), 1, DataType::QSYMM16, qsymm_0);
554  NEActivationLayer::validate(&forget_gate_input, &forget_gate_output,
555  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
556  // _sigmoid_input_gate
557  const TensorInfo input_gate_output(input_gate_input.tensor_shape(), 1, DataType::QSYMM16, qsymm_0);
559  &input_gate_input, &input_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
560  // _tanh_modulation_gate
561  const TensorInfo input_modulation_gate_output(input_modulation_gate_input.tensor_shape(), 1, DataType::QSYMM16,
562  qsymm_0);
564  NEActivationLayer::validate(&input_modulation_gate_input, &input_modulation_gate_output,
565  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f)));
566  // _sigmoid_output_gate
567  const TensorInfo output_gate_output(output_gate_input.tensor_shape(), 1, DataType::QSYMM16, qsymm_0);
569  NEActivationLayer::validate(&output_gate_input, &output_gate_output,
570  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
571 
572  // _mul_forget_gate_cell_state
573  const TensorInfo cell_state_tmp1(forget_gate_output.tensor_shape(), 1, DataType::QSYMM16, qsymm_4);
575  &forget_gate_output, cell_state_in, &cell_state_tmp1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
576 
577  // _mul_input_gate_input_mod_gate
578  const TensorInfo cell_state_tmp2(input_gate_output.tensor_shape(), 1, DataType::QSYMM16, qsymm_4);
579  ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(&input_gate_output, &input_modulation_gate_output,
580  &cell_state_tmp2, 1, ConvertPolicy::SATURATE,
582 
583  // _add_cell_state_tmps
585  NEArithmeticAddition::validate(&cell_state_tmp1, &cell_state_tmp2, cell_state_out, ConvertPolicy::SATURATE));
586 
587  // _tanh_modulation_gate
588  const TensorInfo output_state_tmp(cell_state_out->tensor_shape(), 1, DataType::QSYMM16, qsymm_0);
590  NEActivationLayer::validate(cell_state_out, &output_state_tmp,
591  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f)));
592 
593  // _mul_output_state_tmp_output_gate
594  const TensorInfo output_state_out_symm(output_gate_output.tensor_shape(), 1, DataType::QSYMM16, qsymm_0);
595  ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(&output_state_tmp, &output_gate_output,
596  &output_state_out_symm, 1, ConvertPolicy::SATURATE,
598 
599  // _dequantize
600  const TensorInfo output_state_out_f32(output_state_out_symm.tensor_shape(), 1, DataType::F32);
601  ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&output_state_out_symm, &output_state_out_f32));
602 
603  // _quantize
604  ARM_COMPUTE_RETURN_ON_ERROR(NEQuantizationLayer::validate(&output_state_out_f32, output_state_out));
605 
606  if (cell_state_out->total_size() != 0)
607  {
608  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&cell_state_info, cell_state_out);
609  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&cell_state_info, cell_state_out);
610  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&cell_state_info, cell_state_out);
611  }
612 
613  if (output_state_out->total_size() != 0)
614  {
615  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&output_state_info, output_state_out);
616  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&output_state_info, output_state_out);
617  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&output_state_info, output_state_out);
618  }
619 
620  return Status{};
621 }
622 
624 {
625  prepare();
626 
627  // Acquire all the temporaries
628  MemoryGroupResourceScope scope_mg(_memory_group);
629 
630  // Concat and transpose the input
631  _concat_inputs.run();
632 
633  // Run gemmlowp
634  _gemmlowp.run();
635  _output_stage.run();
636 
637  // Slice the results
638  _slice_input_tensor.run();
639  _slice_forget_tensor.run();
640  _slice_cell_tensor.run();
641  _slice_output_tensor.run();
642 
643  // Gates
644  // Forget gate
645  _sigmoid_forget_gate.run();
646 
647  // Input gate
648  _sigmoid_input_gate.run();
649 
650  // Input modulation gate
651  _tanh_modulation_gate.run();
652 
653  // Output gate
654  _sigmoid_output_gate.run();
655 
656  // Cell state (long term memory)
657  _mul1.run();
658  _mul2.run();
659  _add1.run();
660 
661  // Output state (short term memory)
662  _tanh_output_state.run();
663  _mul3.run();
664 
665  // Requantize output state from QSYMM16 to QASYMM8
666  _dequantize.run();
667  _quantize.run();
668 }
669 
671 {
672  if (!_is_prepared)
673  {
674  _input_weights.allocator()->allocate();
675  _concat_input_weights.run();
676 
677  _input_to_input_weights->mark_as_unused();
678  _input_to_forget_weights->mark_as_unused();
679  _input_to_cell_weights->mark_as_unused();
680  _input_to_output_weights->mark_as_unused();
681 
682  _recurrent_weights.allocator()->allocate();
683  _concat_recurrent_weights.run();
684  _recurrent_to_input_weights->mark_as_unused();
685  _recurrent_to_forget_weights->mark_as_unused();
686  _recurrent_to_cell_weights->mark_as_unused();
687  _recurrent_to_output_weights->mark_as_unused();
688 
689  _weights.allocator()->allocate();
690  _concat_weights.run();
691 
692  _input_weights.mark_as_unused();
693  _input_weights.allocator()->free();
694  _recurrent_weights.mark_as_unused();
695  _recurrent_weights.allocator()->free();
696 
697  _weights_transposed.allocator()->allocate();
698  _transpose_weights.run();
699 
700  _weights.mark_as_unused();
701  _weights.allocator()->free();
702 
703  _bias.allocator()->allocate();
704  _concat_bias.run();
705  _input_gate_bias->mark_as_unused();
706  _forget_gate_bias->mark_as_unused();
707  _cell_bias->mark_as_unused();
708  _output_gate_bias->mark_as_unused();
709 
710  _is_prepared = true;
711  }
712 }
713 
714 } // namespace arm_compute
arm_compute::NEArithmeticAddition::configure
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Definition: NEArithmeticAddition.cpp:58
arm_compute::TensorInfo::clone
std::unique_ptr< ITensorInfo > clone() const override
Definition: TensorInfo.cpp:334
arm_compute::UniformQuantizationInfo::offset
int32_t offset
Definition: QuantizationInfo.h:63
arm_compute::NEGEMMLowpOutputStage::configure
void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
Definition: NEGEMMLowpOutputStage.cpp:47
arm_compute::NEGEMMLowpMatrixMultiplyCore::run
void run() override
Run the kernels contained in the function.
Definition: NEGEMMLowpMatrixMultiplyCore.cpp:104
arm_compute::MemoryGroup::manage
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:76
arm_compute::NELSTMLayerQuantized::~NELSTMLayerQuantized
~NELSTMLayerQuantized()
Default destructor.
arm_compute::ITensorInfo::tensor_shape
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
arm_compute::QuantizationInfo
Quantization information.
Definition: QuantizationInfo.h:67
arm_compute::TensorAllocator::init
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
Definition: TensorAllocator.cpp:106
arm_compute::NETranspose::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NETranspose.
Definition: NETranspose.cpp:57
arm_compute::GEMMLowpOutputStageInfo
GEMMLowp output stage info.
Definition: GEMMInfo.h:45
arm_compute::NEGEMMLowpMatrixMultiplyCore::validate
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiply...
Definition: NEGEMMLowpMatrixMultiplyCore.cpp:88
arm_compute::NEGEMMLowpMatrixMultiplyCore::configure
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
Definition: NEGEMMLowpMatrixMultiplyCore.cpp:62
arm_compute::NEConcatenateLayer::configure
void configure(std::vector< const ITensor * > inputs_vector, ITensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
Definition: NEConcatenateLayer.cpp:54
arm_compute::NELSTMLayerQuantized::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out)
Static function to check if given info will lead to a valid configuration of NELSTMLayer.
Definition: NELSTMLayerQuantized.cpp:348
arm_compute::DataType::QASYMM8
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
arm_compute::NESlice::run
void run() override
Run the kernels contained in the function.
Definition: NESlice.cpp:102
arm_compute::TensorShape
Shape of a tensor.
Definition: TensorShape.h:39
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:574
arm_compute::test::validation::input_gate_bias
auto input_gate_bias
Definition: LSTMLayerQuantized.cpp:480
arm_compute::Window::DimX
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
arm_compute::NEPixelWiseMultiplication::run
void run() override
Run the kernels contained in the function.
Definition: NEPixelWiseMultiplication.cpp:74
arm_compute::NEGEMMLowpOutputStage::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of NEGEMMLowpOutputStage.
Definition: NEGEMMLowpOutputStage.cpp:66
arm_compute::ITensor
Interface for CPU tensor.
Definition: ITensor.h:36
arm_compute::NEPixelWiseMultiplication::validate
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEPixelWiseMultiplicatio...
Definition: NEPixelWiseMultiplication.cpp:47
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:677
arm_compute::test::validation::forget_gate_bias
auto forget_gate_bias
Definition: LSTMLayerQuantized.cpp:481
arm_compute::DataType::QSYMM16
@ QSYMM16
quantized, symmetric fixed-point 16-bit number
arm_compute::test::validation::output_gate_bias
auto output_gate_bias
Definition: LSTMLayerQuantized.cpp:483
arm_compute::NELSTMLayerQuantized::prepare
void prepare() override
Prepare the function for executing.
Definition: NELSTMLayerQuantized.cpp:670
ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:205
arm_compute::NEActivationLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
Definition: NEActivationLayer.cpp:60
arm_compute::ActivationLayerInfo
Activation Layer Information class.
Definition: ActivationLayerInfo.h:55
arm_compute::NESlice::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
Static function to check if given info will lead to a valid configuration of NESlice.
Definition: NESlice.cpp:86
ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
arm_compute::ITensor::info
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
arm_compute::test::validation::qweights
QuantizationInfo qweights(1.f/16.f, 16)
arm_compute::TensorInfo::set_quantization_info
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.cpp:398
arm_compute::NEActivationLayer::configure
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
Definition: NEActivationLayer.cpp:48
arm_compute::NETranspose::configure
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's inputs and output.
Definition: NETranspose.cpp:46
arm_compute::Tensor::allocator
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: Tensor.cpp:47
arm_compute::test::validation::recurrent_to_forget_weights
auto recurrent_to_forget_weights
Definition: LSTMLayerQuantized.cpp:477
ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
NELSTMLayerQuantized.h
ARM_COMPUTE_RETURN_ERROR_ON
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:298
arm_compute::auto_init_if_empty
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: AutoConfiguration.h:43
arm_compute::ITensor::mark_as_unused
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:167
arm_compute::NEDequantizationLayer::configure
void configure(const ITensor *input, ITensor *output)
Configure the kernel.
Definition: NEDequantizationLayer.cpp:46
arm_compute::Status
Status class.
Definition: Error.h:52
arm_compute::NEArithmeticAddition::run
void run() override
Run the kernels contained in the function.
Definition: NEArithmeticAddition.cpp:71
arm_compute::ConvertPolicy::SATURATE
@ SATURATE
Saturate.
arm_compute::QuantizationInfo::uniform
UniformQuantizationInfo uniform() const
Return per layer quantization info.
Definition: QuantizationInfo.h:140
arm_compute::test::validation::recurrent_to_output_weights
auto recurrent_to_output_weights
Definition: LSTMLayerQuantized.cpp:479
arm_compute::NEConcatenateLayer::validate
static Status validate(const std::vector< const ITensorInfo * > &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of NEConcatenateLayer.
Definition: NEConcatenateLayer.cpp:73
arm_compute::NEGEMMLowpOutputStage::run
void run() override
Run the kernels contained in the function.
Definition: NEGEMMLowpOutputStage.cpp:74
arm_compute::Tensor::info
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: Tensor.cpp:32
arm_compute::NEArithmeticAddition::validate
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticAddition.
Definition: NEArithmeticAddition.cpp:49
arm_compute::Window::DimY
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
arm_compute::NELSTMLayerQuantized::NELSTMLayerQuantized
NELSTMLayerQuantized(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
Definition: NELSTMLayerQuantized.cpp:49
AutoConfiguration.h
arm_compute::NEPixelWiseMultiplication::configure
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
Definition: NEPixelWiseMultiplication.cpp:58
AsymmHelpers.h
arm_compute::NEQuantizationLayer::run
void run() override
Run the kernels contained in the function.
Definition: NEQuantizationLayer.cpp:59
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
Definition: Validate.h:753
arm_compute::NEDequantizationLayer::run
void run() override
Run the kernels contained in the function.
Definition: NEDequantizationLayer.cpp:59
Utils.h
arm_compute::misc::ICloneable::clone
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
arm_compute::TensorInfo
Store the tensor's metadata.
Definition: TensorInfo.h:41
arm_compute::UniformQuantizationInfo::scale
float scale
Definition: QuantizationInfo.h:62
arm_compute::TensorAllocator::allocate
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
Definition: TensorAllocator.cpp:133
arm_compute::NELSTMLayerQuantized::configure
void configure(const ITensor *input, const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, ITensor *cell_state_in, const ITensor *output_state_in, ITensor *cell_state_out, ITensor *output_state_out)
Initialize function's tensors.
Definition: NELSTMLayerQuantized.cpp:112
arm_compute::MemoryGroupResourceScope
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::RoundingPolicy::TO_ZERO
@ TO_ZERO
Truncates the least significant values that are lost in operations.
arm_compute::NEQuantizationLayer::configure
void configure(const ITensor *input, ITensor *output)
Set the input and output tensors.
Definition: NEQuantizationLayer.cpp:51
arm_compute::DataType::S32
@ S32
signed 32-bit number
Log.h
arm_compute::test::validation::recurrent_to_input_weights
auto recurrent_to_input_weights
Definition: LSTMLayerQuantized.cpp:476
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
arm_compute::quantization::calculate_quantized_multiplier
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Definition: AsymmHelpers.cpp:43
arm_compute::NETranspose::run
void run() override
Run the kernels contained in the function.
Definition: NETranspose.cpp:64
arm_compute::NEQuantizationLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEQuantizationLayer.
Definition: NEQuantizationLayer.cpp:46
arm_compute::TensorAllocator::free
void free() override
Free allocated CPU memory.
Definition: TensorAllocator.cpp:148
arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:44
arm_compute::test::validation::qasymm
QuantizationInfo qasymm(1.f/128.f, 128)
arm_compute::DataType::F32
@ F32
32-bit floating-point number
arm_compute::test::validation::info
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
arm_compute::test::validation::output_size
const int output_size
Definition: LSTMLayerQuantized.cpp:459
arm_compute::NEDequantizationLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEDequantizationLayer.
Definition: NEDequantizationLayer.cpp:54
arm_compute::test::validation::input_to_forget_weights
auto input_to_forget_weights
Definition: LSTMLayerQuantized.cpp:473
arm_compute::NEActivationLayer::run
void run() override
Run the kernels contained in the function.
Definition: NEActivationLayer.cpp:65
ARM_COMPUTE_LOG_PARAMS
#define ARM_COMPUTE_LOG_PARAMS(...)
Definition: Log.h:35
arm_compute::test::validation::qsymm_4
QuantizationInfo qsymm_4(16.f/32768.f, 0)
Validate.h
arm_compute::test::validation::input_to_output_weights
auto input_to_output_weights
Definition: LSTMLayerQuantized.cpp:475
arm_compute::GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT
@ QUANTIZE_DOWN_FIXEDPOINT
Quantize using a fixed point multiplication.
arm_compute::test::validation::qsymm_3
QuantizationInfo qsymm_3(8.f/32768.f, 0)
arm_compute::ITensorInfo::total_size
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
arm_compute::test::validation::input_to_input_weights
auto input_to_input_weights
Definition: LSTMLayerQuantized.cpp:472
arm_compute::TensorInfo::tensor_shape
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:245
arm_compute::test::validation::input_to_cell_weights
auto input_to_cell_weights
Definition: LSTMLayerQuantized.cpp:474
arm_compute::test::validation::recurrent_to_cell_weights
auto recurrent_to_cell_weights
Definition: LSTMLayerQuantized.cpp:478
arm_compute::ITensorInfo::set_quantization_info
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
arm_compute::test::validation::input
auto input
Definition: LSTMLayerQuantized.cpp:486
arm_compute::ITensorInfo::num_dimensions
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
arm_compute::test::validation::input_size
const int input_size
Definition: LSTMLayerQuantized.cpp:458
arm_compute::NELSTMLayerQuantized::run
void run() override
Run the kernels contained in the function.
Definition: NELSTMLayerQuantized.cpp:623
arm_compute::NEConcatenateLayer::run
void run() override
Run the kernels contained in the function.
Definition: NEConcatenateLayer.cpp:80
arm_compute::NESlice::configure
void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
Definition: NESlice.cpp:94