Compute Library
 23.11
CLQLSTMLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 #include "arm_compute/core/Utils.h"
33 
34 #include "src/common/utils/Log.h"
39 
40 namespace arm_compute
41 {
42 using namespace arm_compute::utils::info_helpers;
43 using namespace arm_compute::opencl::kernels;
44 namespace
45 {
46 Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info,
47  const ITensorInfo *mm_input,
48  const ITensorInfo *mm_weights,
49  const ITensorInfo *bias,
50  float gemmlowp_scale,
51  const TensorInfo *mm_res_info,
52  const TensorInfo *outstage_tensor_info)
53 {
54  ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(mm_input, mm_weights, nullptr, mm_res_info));
56  gemmlowp_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
58  CLGEMMLowpOutputStage::validate(mm_res_info, bias, outstage_tensor_info, gemmlowp_info));
59  return Status{};
60 }
61 } // namespace
62 
63 Status CLQLSTMLayer::TensorCopyKernel::validate(const ITensorInfo &src, const ITensorInfo &dst)
64 {
65  ARM_COMPUTE_RETURN_ERROR_ON(src.tensor_shape().num_dimensions() > max_dimension_supported);
66  ARM_COMPUTE_RETURN_ERROR_ON(dst.tensor_shape().num_dimensions() > max_dimension_supported);
68  ARM_COMPUTE_RETURN_ERROR_ON(dst.tensor_shape().y() != src.tensor_shape().y());
69  return Status{};
70 }
71 
72 void CLQLSTMLayer::TensorCopyKernel::configure(ICLTensor &src, ICLTensor &dst)
73 {
75  _src = &src;
76  _dst = &dst;
77  _row_size = std::min(_src->info()->tensor_shape().x(), _dst->info()->tensor_shape().x());
78  _window = calculate_max_window(*_src->info(), Steps());
79 }
80 
82 {
83  auto &q = CLScheduler::get().queue();
84 
85  _src->map(q, true);
86  _dst->map(q, true);
87 
88  Iterator input_iter{_src, _window};
89  Iterator output_iter{_dst, _window};
90 
92  _window, [&](const Coordinates &) { memcpy(output_iter.ptr(), input_iter.ptr(), _row_size); }, input_iter,
93  output_iter);
94 
95  _src->unmap(q);
96  _dst->unmap(q);
97 }
98 
99 CLQLSTMLayer::CLQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
100  : _input_to_input_reduction(std::make_unique<ClGemmLowpMatrixAReductionKernel>()),
101  _recurrent_to_input_reduction(std::make_unique<ClGemmLowpMatrixAReductionKernel>()),
102  _input_to_forget_reduction(std::make_unique<ClGemmLowpMatrixAReductionKernel>()),
103  _recurrent_to_forget_reduction(std::make_unique<ClGemmLowpMatrixAReductionKernel>()),
104  _input_to_cell_reduction(std::make_unique<ClGemmLowpMatrixAReductionKernel>()),
105  _recurrent_to_cell_reduction(std::make_unique<ClGemmLowpMatrixAReductionKernel>()),
106  _input_to_output_reduction(std::make_unique<ClGemmLowpMatrixAReductionKernel>()),
107  _recurrent_to_output_reduction(std::make_unique<ClGemmLowpMatrixAReductionKernel>()),
108  _projection_reduction(std::make_unique<ClGemmLowpMatrixAReductionKernel>()),
109  _layer_norms(),
110  _copy_output()
111 {
112  for (auto &norm : _layer_norms)
113  {
114  norm = std::make_unique<CLQLSTMLayerNormalizationKernel>();
115  }
116 
117  _memory_group = MemoryGroup(std::move(memory_manager));
118 }
119 
120 CLQLSTMLayer::~CLQLSTMLayer() = default;
121 
122 void CLQLSTMLayer::configure_layer_norm(LayerNormGate g, const ICLTensor *in)
123 {
124  ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
125 
126  CLTensor *out = &get_layer_norm_output(g);
127  _memory_group.manage(out);
128  out->allocator()->init(*(in->info()));
129 
130  get_layer_norm(g).configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g));
131 }
132 
133 Status CLQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
134 {
135  // Output quantization scale will be different, but ignored here
136  // since it will be configured at configure() stage.
137  const TensorInfo out{in};
138  return CLQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
139 }
140 
141 void CLQLSTMLayer::configure_mm(const CLCompileContext &compile_context,
142  CLGEMMLowpMatrixMultiplyCore &mm,
143  CLGEMMLowpOutputStage &outstage,
144  GEMMLowpOutputStageInfo &gemmlowp_info,
145  const ICLTensor *mm_input,
146  const ICLTensor *mm_weights,
147  const ICLTensor *bias,
148  CLTensor *mm_res,
149  CLTensor *outstage_res,
150  float gemmlowp_scale,
151  const TensorInfo &mm_res_info,
152  const TensorInfo &outstage_tensor_info)
153 {
154  _memory_group.manage(mm_res);
155  _memory_group.manage(outstage_res);
156 
157  mm_res->allocator()->init(mm_res_info);
158  outstage_res->allocator()->init(outstage_tensor_info);
159 
160  // Configure matrix-multiplication
161  mm.configure(compile_context, mm_input, mm_weights, nullptr, mm_res);
162 
163  // Configure output stage
164  quantization::calculate_quantized_multiplier(gemmlowp_scale, &gemmlowp_info.gemmlowp_multiplier,
165  &gemmlowp_info.gemmlowp_shift);
166  outstage.configure(compile_context, mm_res, bias, outstage_res, gemmlowp_info);
167  mm_res->allocator()->allocate();
168 }
169 
178  const ICLTensor *cell_bias,
180  ICLTensor *cell_state_in,
181  ICLTensor *output_state_in,
182  ICLTensor *cell_state_out,
183  ICLTensor *output_state_out,
184  ICLTensor *output,
185  const LSTMParams<ICLTensor> &lstm_params)
186 {
190  output_state_in, cell_state_out, output_state_out, output, lstm_params);
191 }
192 
193 void CLQLSTMLayer::configure(const CLCompileContext &compile_context,
194  const ICLTensor *input,
202  const ICLTensor *cell_bias,
204  ICLTensor *cell_state_in,
205  ICLTensor *output_state_in,
206  ICLTensor *cell_state_out,
207  ICLTensor *output_state_out,
208  ICLTensor *output,
209  const LSTMParams<ICLTensor> &lstm_params)
210 {
213  forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
214  cell_state_out, output_state_out, output);
215 
218  forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
219  cell_state_out, output_state_out, output, lstm_params);
220  // Set lstm parameters
221  LSTMParams<ITensorInfo> lstm_params_info{};
222  build_lstm_params_tensor_info(lstm_params, &lstm_params_info);
223 
224  // Validate
228  forget_gate_bias->info(), cell_bias->info(), output_gate_bias->info(), cell_state_in->info(),
229  output_state_in->info(), cell_state_out->info(), output_state_out->info(), output->info(), lstm_params_info));
230 
231  const int batch_size = input->info()->dimension(1);
232  const int num_units = input_to_output_weights->info()->dimension(1);
233  const int output_size = output_state_out->info()->dimension(_out_state_output_size_dimension_idx);
234 
235  const UniformQuantizationInfo qinput = input->info()->quantization_info().uniform();
236  const UniformQuantizationInfo qcell_state_in = cell_state_in->info()->quantization_info().uniform();
237  const UniformQuantizationInfo qoutput_state_in = output_state_in->info()->quantization_info().uniform();
238 
239  _projection_bias = lstm_params.projection_bias();
240  _input_to_forget_weights = input_to_forget_weights;
241  _input_to_cell_weights = input_to_cell_weights;
242  _input_to_output_weights = input_to_output_weights;
243  _recurrent_to_forget_weights = recurrent_to_forget_weights;
244  _recurrent_to_cell_weights = recurrent_to_cell_weights;
245  _recurrent_to_output_weights = recurrent_to_output_weights;
246  _projection_weights = lstm_params.projection_weights();
247 
248  // Layer normalization
249  _has_layer_norm = lstm_params.use_layer_norm();
250  if (_has_layer_norm)
251  {
252  set_layer_norm_weight(lstm_params.forget_layer_norm_weights(), LayerNormGate::Forget);
253  set_layer_norm_weight(lstm_params.cell_layer_norm_weights(), LayerNormGate::Cell);
254  set_layer_norm_weight(lstm_params.input_layer_norm_weights(), LayerNormGate::Input);
255  set_layer_norm_weight(lstm_params.output_layer_norm_weights(), LayerNormGate::Output);
256 
257  set_layer_norm_bias(forget_gate_bias, LayerNormGate::Forget);
258  set_layer_norm_bias(cell_bias, LayerNormGate::Cell);
259  set_layer_norm_bias(lstm_params.input_gate_bias(), LayerNormGate::Input);
260  set_layer_norm_bias(output_gate_bias, LayerNormGate::Output);
261  }
262 
263  _has_cifg = lstm_params.has_cifg_opt();
264  _has_projection = lstm_params.has_projection();
265  _has_peephole = lstm_params.has_peephole_opt();
266 
267  // Calculate and decompose effective scales for optimizing matmul calculation
268  const int32_t cell_shift = log2(qcell_state_in.scale);
269 
270  // Calculate quantized parameters for clipping.
271  int16_t quantized_cell_clip = 0;
272  if (lstm_params.cell_clip() > 0.0f)
273  {
274  quantized_cell_clip = quantize_qsymm16(lstm_params.cell_clip(), qcell_state_in);
275  }
276  _has_cell_clipping = quantized_cell_clip > 0;
277 
278  // Precompute effective bias for optimizing the matmul computations.
279  if (!_has_cifg)
280  {
281  _input_to_input_weights = lstm_params.input_to_input_weights();
282  _recurrent_to_input_weights = lstm_params.recurrent_to_input_weights();
283 
284  _input_to_input_reduction->configure(compile_context, _input_to_input_weights->info(),
285  _input_to_input_eff_bias.info(),
286  GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
287  _recurrent_to_input_reduction->configure(
288  compile_context, _recurrent_to_input_weights->info(), _recurrent_to_input_eff_bias.info(),
289  GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
290  }
291  _input_to_forget_reduction->configure(compile_context, input_to_forget_weights->info(),
292  _input_to_forget_eff_bias.info(),
293  GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
294  _recurrent_to_forget_reduction->configure(
295  compile_context, recurrent_to_forget_weights->info(), _recurrent_to_forget_eff_bias.info(),
296  GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
297  _input_to_cell_reduction->configure(compile_context, input_to_cell_weights->info(), _input_to_cell_eff_bias.info(),
298  GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
299  _recurrent_to_cell_reduction->configure(
300  compile_context, recurrent_to_cell_weights->info(), _recurrent_to_cell_eff_bias.info(),
301  GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
302  _input_to_output_reduction->configure(compile_context, input_to_output_weights->info(),
303  _input_to_output_eff_bias.info(),
304  GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
305  _recurrent_to_output_reduction->configure(
306  compile_context, recurrent_to_output_weights->info(), _recurrent_to_output_eff_bias.info(),
307  GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
308  if (_has_projection)
309  {
310  _projection_reduction->configure(
311  compile_context, _projection_weights->info(), _projection_eff_bias.info(),
312  GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
313  if (_projection_bias != nullptr)
314  {
315  _projection_bias_add.configure(compile_context, _projection_bias, &_projection_eff_bias,
316  &_projection_eff_bias, ConvertPolicy::SATURATE);
317  }
318  }
319 
320  // Pre-transpose weights to be used in GEMM.
321  _transpose_input_to_forget_weights.configure(compile_context, input_to_forget_weights,
322  &_input_to_forget_weights_transposed);
323  _transpose_input_to_cell_weights.configure(compile_context, input_to_cell_weights,
324  &_input_to_cell_weights_transposed);
325  _transpose_input_to_output_weights.configure(compile_context, input_to_output_weights,
326  &_input_to_output_weights_transposed);
327  _transpose_recurrent_to_forget_weights.configure(compile_context, recurrent_to_forget_weights,
328  &_recurrent_to_forget_weights_transposed);
329  _transpose_recurrent_to_cell_weights.configure(compile_context, recurrent_to_cell_weights,
330  &_recurrent_to_cell_weights_transposed);
331  _transpose_recurrent_to_output_weights.configure(compile_context, recurrent_to_output_weights,
332  &_recurrent_to_output_weights_transposed);
333  if (!_has_cifg)
334  {
335  _transpose_input_to_input_weights.configure(compile_context, lstm_params.input_to_input_weights(),
336  &_input_to_input_weights_transposed);
337  _transpose_recurrent_to_input_weights.configure(compile_context, lstm_params.recurrent_to_input_weights(),
338  &_recurrent_to_input_weights_transposed);
339  }
340  if (_has_projection)
341  {
342  _transpose_projection_weights.configure(compile_context, _projection_weights, &_projection_weights_transposed);
343  }
344 
345  GEMMLowpOutputStageInfo gemmlowp_info;
348  gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int16_t>::max();
349  gemmlowp_info.output_data_type = DataType::QSYMM16;
350 
351  const TensorInfo mm_out_info(TensorShape(num_units, batch_size), 1, DataType::S32);
352  // Forget gate.
353  const TensorInfo forget_gate_outstage_info(mm_out_info.tensor_shape(), 1, DataType::QSYMM16,
354  QuantizationInfo(lstm_params.forget_intermediate_scale(), 0));
355  const float input_to_forget_scale = input_to_forget_weights->info()->quantization_info().uniform().scale *
356  qinput.scale / lstm_params.forget_intermediate_scale();
357  configure_mm(compile_context, _mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info, input,
358  &_input_to_forget_weights_transposed, &_input_to_forget_eff_bias, &_mm_input_to_forget_res,
359  &_input_to_forget_outstage_res, input_to_forget_scale, mm_out_info, forget_gate_outstage_info);
360 
361  const float recurrent_to_forget_scale = recurrent_to_forget_weights->info()->quantization_info().uniform().scale *
362  qoutput_state_in.scale / lstm_params.forget_intermediate_scale();
363  configure_mm(compile_context, _mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
364  output_state_in, &_recurrent_to_forget_weights_transposed, &_recurrent_to_forget_eff_bias,
365  &_mm_recurrent_to_forget_res, &_recurrent_to_forget_outstage_res, recurrent_to_forget_scale,
366  mm_out_info, forget_gate_outstage_info);
367 
368  _accumulate_input_recurrent_forget.configure(compile_context, &_input_to_forget_outstage_res,
369  &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
371  _input_to_forget_outstage_res.allocator()->allocate();
372 
373  if (_has_peephole)
374  {
375  _mul_cell_to_forget_res.allocator()->init(TensorInfo(cell_state_in->info()->tensor_shape(), 1, DataType::S32));
376  _memory_group.manage(&_mul_cell_to_forget_res);
377  _pixelwise_mul_cell_to_forget.configure(compile_context, cell_state_in, lstm_params.cell_to_forget_weights(),
378  &_mul_cell_to_forget_res, 1.f, ConvertPolicy::SATURATE,
380  _cell_to_forget_outstage_res.allocator()->init(
381  TensorInfo(_mul_cell_to_forget_res.info()->tensor_shape(), 1, DataType::QSYMM16,
382  QuantizationInfo(lstm_params.forget_intermediate_scale(), 0)));
383  _memory_group.manage(&_cell_to_forget_outstage_res);
384  const float cell_to_forget_scale =
385  std::pow(2, cell_shift) *
386  lstm_params.cell_to_forget_weights()->info()->quantization_info().uniform().scale /
387  lstm_params.forget_intermediate_scale();
388  quantization::calculate_quantized_multiplier(cell_to_forget_scale, &gemmlowp_info.gemmlowp_multiplier,
389  &gemmlowp_info.gemmlowp_shift);
390  _cell_to_forget_outstage.configure(compile_context, &_mul_cell_to_forget_res, nullptr,
391  &_cell_to_forget_outstage_res, gemmlowp_info);
392  _mul_cell_to_forget_res.allocator()->allocate();
393  _accumulate_cell_forget.configure(compile_context, &_recurrent_to_forget_outstage_res,
394  &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
396  _cell_to_forget_outstage_res.allocator()->allocate();
397  }
398 
399  CLTensor *forget_activation_input = &_recurrent_to_forget_outstage_res;
400 
401  if (_has_layer_norm)
402  {
403  configure_layer_norm(LayerNormGate::Forget, &_recurrent_to_forget_outstage_res);
404  _recurrent_to_forget_outstage_res.allocator()->allocate();
405  forget_activation_input = &get_layer_norm_output(LayerNormGate::Forget);
406  }
407 
408  // Output quantization info of Sigmoid and Tanh activations
409  const QuantizationInfo sigmoid_tanh_outqinfo(1.f / 32768.f, 0);
410 
411  const TensorInfo forget_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
412  _memory_group.manage(&_forget_gate);
413  _forget_gate.allocator()->init(forget_gate_info);
414  _forget_gate_sigmoid.configure(compile_context, forget_activation_input, &_forget_gate,
415  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
416  forget_activation_input->allocator()->allocate();
417 
418  // Modulation gate.
419  const TensorInfo cell_outstage_info(mm_out_info.tensor_shape(), 1, DataType::QSYMM16,
420  QuantizationInfo(lstm_params.cell_intermediate_scale(), 0));
421  const float input_to_cell_scale = input_to_cell_weights->info()->quantization_info().uniform().scale *
422  qinput.scale / lstm_params.cell_intermediate_scale();
423  configure_mm(compile_context, _mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info, input,
424  &_input_to_cell_weights_transposed, &_input_to_cell_eff_bias, &_mm_input_to_cell_res,
425  &_input_to_cell_outstage_res, input_to_cell_scale, mm_out_info, cell_outstage_info);
426 
427  const float recurrent_to_cell_scale = recurrent_to_cell_weights->info()->quantization_info().uniform().scale *
428  qoutput_state_in.scale / lstm_params.cell_intermediate_scale();
429  configure_mm(compile_context, _mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info, output_state_in,
430  &_recurrent_to_cell_weights_transposed, &_recurrent_to_cell_eff_bias, &_mm_recurrent_to_cell_res,
431  &_recurrent_to_cell_outstage_res, recurrent_to_cell_scale, mm_out_info, cell_outstage_info);
432 
433  _accumulate_input_recurrent_modulation.configure(compile_context, &_input_to_cell_outstage_res,
434  &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res,
436  _input_to_cell_outstage_res.allocator()->allocate();
437 
438  CLTensor *cell_activation_input = &_recurrent_to_cell_outstage_res;
439 
440  if (_has_layer_norm)
441  {
442  configure_layer_norm(LayerNormGate::Cell, &_recurrent_to_cell_outstage_res);
443  _recurrent_to_cell_outstage_res.allocator()->allocate();
444  cell_activation_input = &get_layer_norm_output(LayerNormGate::Cell);
445  }
446 
447  const TensorInfo cell_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
448  _memory_group.manage(&_cell_gate);
449  _cell_gate.allocator()->init(cell_gate_info);
450  _cell_gate_tanh.configure(compile_context, cell_activation_input, &_cell_gate,
451  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
452  cell_activation_input->allocator()->allocate();
453 
454  // Input gate.
455  const TensorInfo input_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
456  _input_gate.allocator()->init(input_gate_info);
457  _memory_group.manage(&_input_gate);
458  if (_has_cifg)
459  {
460  _ones.allocator()->init(*_forget_gate.info());
461  _input_gate_sub.configure(compile_context, &_ones, &_forget_gate, &_input_gate, ConvertPolicy::SATURATE);
462  _ones.allocator()->allocate();
463  }
464  else
465  {
466  const TensorInfo input_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16,
467  QuantizationInfo(lstm_params.input_intermediate_scale(), 0));
468  const float input_to_input_scale = _input_to_input_weights->info()->quantization_info().uniform().scale *
469  qinput.scale / lstm_params.input_intermediate_scale();
470  configure_mm(compile_context, _mm_input_to_input, _input_to_input_outstage, gemmlowp_info, input,
471  &_input_to_input_weights_transposed, &_input_to_input_eff_bias, &_mm_input_to_input_res,
472  &_input_to_input_outstage_res, input_to_input_scale, mm_out_info, input_outstage_info);
473 
474  const float recurrent_to_input_scale =
475  _recurrent_to_input_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale /
476  lstm_params.input_intermediate_scale();
477  configure_mm(compile_context, _mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
478  output_state_in, &_recurrent_to_input_weights_transposed, &_recurrent_to_input_eff_bias,
479  &_mm_recurrent_to_input_res, &_recurrent_to_input_outstage_res, recurrent_to_input_scale,
480  mm_out_info, input_outstage_info);
481  _accumulate_input_recurrent_input.configure(compile_context, &_input_to_input_outstage_res,
482  &_recurrent_to_input_outstage_res,
483  &_recurrent_to_input_outstage_res, ConvertPolicy::SATURATE);
484  _input_to_input_outstage_res.allocator()->allocate();
485 
486  if (_has_peephole)
487  {
488  _mul_cell_to_input_res.allocator()->init(
489  TensorInfo(cell_state_in->info()->tensor_shape(), 1, DataType::S32));
490  _memory_group.manage(&_mul_cell_to_input_res);
491  _pixelwise_mul_cell_to_input.configure(compile_context, cell_state_in, lstm_params.cell_to_input_weights(),
492  &_mul_cell_to_input_res, 1.f, ConvertPolicy::SATURATE,
494  const float cell_to_input_scale =
495  std::pow(2, cell_shift) *
496  lstm_params.cell_to_input_weights()->info()->quantization_info().uniform().scale /
497  lstm_params.input_intermediate_scale();
498  quantization::calculate_quantized_multiplier(cell_to_input_scale, &gemmlowp_info.gemmlowp_multiplier,
499  &gemmlowp_info.gemmlowp_shift);
500  _cell_to_input_outstage_res.allocator()->init(
501  TensorInfo(_mul_cell_to_input_res.info()->tensor_shape(), 1, DataType::QSYMM16,
502  QuantizationInfo(lstm_params.input_intermediate_scale(), 0)));
503  _memory_group.manage(&_cell_to_input_outstage_res);
504  _cell_to_input_outstage.configure(compile_context, &_mul_cell_to_input_res, nullptr,
505  &_cell_to_input_outstage_res, gemmlowp_info);
506  _mul_cell_to_input_res.allocator()->allocate();
507  _accumulate_cell_input.configure(&_recurrent_to_input_outstage_res, &_cell_to_input_outstage_res,
508  &_recurrent_to_input_outstage_res, ConvertPolicy::SATURATE);
509  _cell_to_input_outstage_res.allocator()->allocate();
510  }
511 
512  CLTensor *input_activation_input = &_recurrent_to_input_outstage_res;
513 
514  if (_has_layer_norm)
515  {
516  configure_layer_norm(LayerNormGate::Input, &_recurrent_to_input_outstage_res);
517  _recurrent_to_input_outstage_res.allocator()->allocate();
518  input_activation_input = &get_layer_norm_output(LayerNormGate::Input);
519  }
520 
521  _input_gate_sigmoid.configure(compile_context, input_activation_input, &_input_gate,
522  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
523  input_activation_input->allocator()->allocate();
524  }
525  // Cell.
526  // TODO(COMPMID-3396): Perform multiplication in the quantized domain in CLPixelWiseMultiplication
527  _pixelwise_mul_forget_cell.configure(compile_context, &_forget_gate, cell_state_in, &_forget_gate, 1.f,
529  const float cell_gate_scale = _cell_gate.info()->quantization_info().uniform().scale;
530  const float mul_input_cell_scale = cell_gate_scale * std::pow(2, 15 + cell_shift);
531  const TensorInfo mul_input_cell_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16,
532  QuantizationInfo(mul_input_cell_scale, 0));
533  _memory_group.manage(&_mul_input_cell_res);
534  _mul_input_cell_res.allocator()->init(mul_input_cell_info);
535  _pixelwise_mul_input_cell.configure(compile_context, &_input_gate, &_cell_gate, &_mul_input_cell_res, 1.f,
537  _cell_gate.allocator()->allocate();
538  _add_forget_cell.configure(compile_context, &_forget_gate, &_mul_input_cell_res, cell_state_out,
540  _mul_input_cell_res.allocator()->allocate();
541  _forget_gate.allocator()->allocate();
542  if (_has_cell_clipping)
543  {
544  _cell_clip.configure(compile_context, cell_state_out, nullptr,
545  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
546  -quantized_cell_clip, quantized_cell_clip));
547  }
548  // Output gate.
549  const TensorInfo output_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16,
550  QuantizationInfo(lstm_params.output_intermediate_scale(), 0));
551  const float input_to_output_scale = input_to_output_weights->info()->quantization_info().uniform().scale *
552  qinput.scale / lstm_params.output_intermediate_scale();
553  configure_mm(compile_context, _mm_input_to_output, _input_to_output_outstage, gemmlowp_info, input,
554  &_input_to_output_weights_transposed, &_input_to_output_eff_bias, &_mm_input_to_output_res,
555  &_input_to_output_outstage_res, input_to_output_scale, mm_out_info, output_outstage_info);
556 
557  const float recurrent_to_output_scale = recurrent_to_output_weights->info()->quantization_info().uniform().scale *
558  qoutput_state_in.scale / lstm_params.output_intermediate_scale();
559  configure_mm(compile_context, _mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
560  output_state_in, &_recurrent_to_output_weights_transposed, &_recurrent_to_output_eff_bias,
561  &_mm_recurrent_to_output_res, &_recurrent_to_output_outstage_res, recurrent_to_output_scale,
562  mm_out_info, output_outstage_info);
563 
564  _accumulate_input_recurrent_output.configure(compile_context, &_recurrent_to_output_outstage_res,
565  &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res,
567  _input_to_output_outstage_res.allocator()->allocate();
568 
569  if (_has_peephole)
570  {
571  // TODO(COMPMID-3396): Perform multiplication in the quantized domain in CLPixelWiseMultiplication
572  // Here we are not using the output stage because all operations are done in float
573  _mul_cell_to_output_res.allocator()->init(TensorInfo(cell_state_out->info()->tensor_shape(), 1, DataType::S32));
574  _memory_group.manage(&_mul_cell_to_output_res);
575  _pixelwise_mul_cell_to_output.configure(compile_context, cell_state_out, lstm_params.cell_to_output_weights(),
576  &_mul_cell_to_output_res, 1.f, ConvertPolicy::SATURATE,
578 
579  const float cell_to_output_scale =
580  std::pow(2, cell_shift) *
581  lstm_params.cell_to_output_weights()->info()->quantization_info().uniform().scale /
582  lstm_params.output_intermediate_scale();
583  quantization::calculate_quantized_multiplier(cell_to_output_scale, &gemmlowp_info.gemmlowp_multiplier,
584  &gemmlowp_info.gemmlowp_shift);
585  _cell_to_output_outstage_res.allocator()->init(
586  TensorInfo(_mul_cell_to_output_res.info()->tensor_shape(), 1, DataType::QSYMM16,
587  QuantizationInfo(lstm_params.output_intermediate_scale(), 0)));
588  _memory_group.manage(&_cell_to_output_outstage_res);
589  _cell_to_output_outstage.configure(compile_context, &_mul_cell_to_output_res, nullptr,
590  &_cell_to_output_outstage_res, gemmlowp_info);
591  _mul_cell_to_output_res.allocator()->allocate();
592 
593  _accumulate_cell_to_output.configure(compile_context, &_recurrent_to_output_outstage_res,
594  &_cell_to_output_outstage_res, &_recurrent_to_output_outstage_res,
596  _cell_to_output_outstage_res.allocator()->allocate();
597  }
598 
599  CLTensor *output_activation_input = &_recurrent_to_output_outstage_res;
600 
601  if (_has_layer_norm)
602  {
603  configure_layer_norm(LayerNormGate::Output, &_recurrent_to_output_outstage_res);
604  _recurrent_to_output_outstage_res.allocator()->allocate();
605  output_activation_input = &get_layer_norm_output(LayerNormGate::Output);
606  }
607 
608  const TensorInfo output_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
609  _memory_group.manage(&_output_gate);
610  _output_gate.allocator()->init(output_gate_info);
611  _output_gate_sigmoid.configure(compile_context, output_activation_input, &_output_gate,
612  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
613  output_activation_input->allocator()->allocate();
614 
615  // Hidden.
616  _hidden_tanh.configure(compile_context, cell_state_out, &_input_gate,
617  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
618  // TODO(COMPMID-3396): Perform multiplication in the quantized domain in CLPixelWiseMultiplication
619  _memory_group.manage(&_hidden_mul_res);
620  const TensorInfo hidden_mul_res(_input_gate.info()->tensor_shape(), 1, DataType::S32);
621  _hidden_mul_res.allocator()->init(hidden_mul_res);
622  _pixelwise_mul_hidden.configure(compile_context, &_output_gate, &_input_gate, &_hidden_mul_res, 1.f,
624  _output_gate.allocator()->allocate();
625  _input_gate.allocator()->allocate();
626  const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
627  quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier,
628  &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true);
629  gemmlowp_info.gemmlowp_offset = lstm_params.hidden_state_zero();
630  gemmlowp_info.output_data_type = output_state_in->info()->data_type();
631 
632  _projection_tensor_copy_required = (num_units != output_size);
633  ICLTensor *hidden_gate_result = output_state_out;
634 
635  _memory_group.manage(&_hidden_gate);
636 
637  if (_projection_tensor_copy_required)
638  {
639  _hidden_gate.allocator()->init(*output_state_out->info());
640  _hidden_gate.info()->set_tensor_shape(_hidden_mul_res.info()->tensor_shape());
641  hidden_gate_result = &_hidden_gate;
642  }
643 
644  _hidden_outstage.configure(compile_context, &_hidden_mul_res, nullptr, hidden_gate_result, gemmlowp_info);
645  _hidden_mul_res.allocator()->allocate();
646 
647  // Projection.
648  if (_has_projection)
649  {
650  const TensorInfo projection_outstage_info(*output_state_out->info());
651  const UniformQuantizationInfo qprojection = _projection_weights->info()->quantization_info().uniform();
652  const float projection_scale = qprojection.scale * lstm_params.hidden_state_scale() / qoutput_state_in.scale;
653  gemmlowp_info.gemmlowp_offset = qoutput_state_in.offset;
655  gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int8_t>::max();
657 
658  TensorInfo projection_mm_out_info{mm_out_info};
659  projection_mm_out_info.set_tensor_shape(TensorShape(output_size, batch_size));
660 
661  configure_mm(compile_context, _mm_projection, _projection_outstage, gemmlowp_info, hidden_gate_result,
662  &_projection_weights_transposed, &_projection_eff_bias, &_mm_projection_res,
663  &_projection_outstage_res, projection_scale, projection_mm_out_info, projection_outstage_info);
664 
665  ICLTensor *accumulate_destination = output_state_out;
666 
667  if (_projection_tensor_copy_required)
668  {
669  _hidden_gate.allocator()->allocate();
670  _projection_accumulate_res.allocator()->init(*output_state_in->info());
671  _projection_accumulate_res.info()->set_tensor_shape(_projection_outstage_res.info()->tensor_shape());
672  _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
673  accumulate_destination = &_projection_accumulate_res;
674  }
675 
676  _accumulate_projection.configure(compile_context, &_projection_outstage_res, accumulate_destination,
677  accumulate_destination, ConvertPolicy::SATURATE);
678  _projection_outstage_res.allocator()->allocate();
679 
680  if (_projection_tensor_copy_required)
681  {
682  _projection_accumulate_to_output_copy.configure(_projection_accumulate_res, *output_state_out);
683  _projection_accumulate_res.allocator()->allocate();
684  }
685 
686  int8_t quantized_projection_clip{0};
687  if (lstm_params.projection_clip() > 0.0f)
688  {
689  quantized_projection_clip =
690  utility::clamp<int8_t>(lstm_params.projection_clip() / qprojection.scale, -128, 127);
691  }
692 
693  if (quantized_projection_clip > 0)
694  {
695  _projection_clip.configure(compile_context, output_state_out, nullptr,
696  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
697  -quantized_projection_clip, quantized_projection_clip));
698  _has_projection_clipping = true;
699  }
700  }
701  else
702  {
703  if (_projection_tensor_copy_required)
704  {
705  _hidden_to_output_copy.configure(_hidden_gate, *output_state_out);
706  _hidden_gate.allocator()->allocate();
707  }
708  }
709 
710  // Copy output_state_out to output
711  _copy_output.configure(compile_context, output_state_out, output);
712 }
713 
722  const ITensorInfo *cell_bias,
724  const ITensorInfo *cell_state_in,
725  const ITensorInfo *output_state_in,
726  const ITensorInfo *cell_state_out,
727  const ITensorInfo *output_state_out,
728  const ITensorInfo *output,
729  const LSTMParams<ITensorInfo> &lstm_params)
730 {
734  cell_state_in, output_state_in, cell_state_out, output_state_out, output);
735 
737  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() != 2, "Input must have exactly 2 dimensions");
738 
739  const unsigned int input_size = input->dimension(0);
740  const unsigned int batch_size = input->dimension(1);
741  const unsigned int num_units = input_to_output_weights->dimension(1);
742  const unsigned int output_size = output_state_out->dimension(_out_state_output_size_dimension_idx);
743 
749  ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_output_weights->dimension(1) != num_units);
756 
757  ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->num_dimensions() != 1);
758  ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->dimension(0) != num_units);
762 
763  ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->num_dimensions() != 2);
764  ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->dimension(0) != num_units);
765  ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->dimension(1) != batch_size);
767 
768  ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() != 2);
769  ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->dimension(0) != output_size);
770  ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->dimension(1) != batch_size);
772 
773  // Check whether peephole weights are all there or none
774  if (lstm_params.has_peephole_opt())
775  {
779  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->num_dimensions() != 1);
780  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->dimension(0) != num_units);
782  lstm_params.cell_to_output_weights());
784  lstm_params.cell_to_output_weights());
785 
786  if (!lstm_params.has_cifg_opt())
787  {
790  lstm_params.cell_to_input_weights());
792  lstm_params.cell_to_input_weights());
793  }
794  }
795 
796  const UniformQuantizationInfo qinput = input->quantization_info().uniform();
797  const UniformQuantizationInfo qcell_state_in = cell_state_in->quantization_info().uniform();
798  const UniformQuantizationInfo qoutput_state_in = output_state_in->quantization_info().uniform();
799 
800  // Calculate and decompose effective scales for optimizing matmul calculation
801  const int32_t cell_shift = log2(qcell_state_in.scale);
802  ARM_COMPUTE_RETURN_ERROR_ON(cell_shift > -9);
803 
804  // Calculate quantized parameters for clipping.
805  int16_t quantized_cell_clip = 0;
806  if (lstm_params.cell_clip() > 0.0f)
807  {
808  quantized_cell_clip = quantize_qsymm16(lstm_params.cell_clip(), qcell_state_in);
809  }
810 
811  // Precompute effective bias for optimizing the matmul computations.
812  const TensorInfo eff_bias_info(TensorShape(num_units), 1, DataType::S32);
813  const TensorInfo projection_eff_bias_info(TensorShape(output_size), 1, DataType::S32);
814  if (!lstm_params.has_cifg_opt())
815  {
817  lstm_params.input_to_input_weights(), &eff_bias_info,
818  GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
820  lstm_params.recurrent_to_input_weights(), &eff_bias_info,
821  GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
822  }
824  input_to_forget_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
826  recurrent_to_forget_weights, &eff_bias_info,
827  GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
829  input_to_cell_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
831  recurrent_to_cell_weights, &eff_bias_info,
832  GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
834  input_to_output_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
836  recurrent_to_output_weights, &eff_bias_info,
837  GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
838  if (lstm_params.has_projection())
839  {
841  lstm_params.projection_weights(), &projection_eff_bias_info,
842  GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true)));
843  if (lstm_params.projection_bias() != nullptr)
844  {
847  CLArithmeticAddition::validate(lstm_params.projection_bias(), &projection_eff_bias_info,
848  &projection_eff_bias_info, ConvertPolicy::SATURATE));
849  }
850  }
851 
852  const TensorInfo input_weights_transposed(TensorShape(num_units, input_size), 1,
853  input_to_forget_weights->data_type(),
854  input_to_forget_weights->quantization_info());
855  const TensorInfo recurrent_weights_transposed(TensorShape(num_units, output_size), 1,
856  recurrent_to_forget_weights->data_type(),
857  recurrent_to_forget_weights->quantization_info());
858 
859  // Validate weights transpose
866  if (!lstm_params.has_cifg_opt())
867  {
869  CLTranspose::validate(lstm_params.input_to_input_weights(), &input_weights_transposed));
871  CLTranspose::validate(lstm_params.recurrent_to_input_weights(), &recurrent_weights_transposed));
872  }
873  if (lstm_params.has_projection())
874  {
875  const TensorInfo projection_weights_transposed(TensorShape(output_size, num_units), 1,
876  lstm_params.projection_weights()->data_type(),
877  lstm_params.projection_weights()->quantization_info());
879  CLTranspose::validate(lstm_params.projection_weights(), &projection_weights_transposed));
880  }
881 
882  GEMMLowpOutputStageInfo gemmlowp_info;
885  gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int16_t>::max();
886  gemmlowp_info.output_data_type = DataType::QSYMM16;
887 
888  const bool has_layer_norm = lstm_params.use_layer_norm();
889 
890  // Forget gate.
892  const TensorInfo forget_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16,
893  QuantizationInfo(lstm_params.forget_intermediate_scale(), 0));
894  const TensorInfo mm_out_info(TensorShape(num_units, batch_size), 1, DataType::S32);
895  const float input_to_forget_scale = input_to_forget_weights->quantization_info().uniform().scale * qinput.scale /
896  lstm_params.forget_intermediate_scale();
897  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info,
898  input_to_forget_scale, &mm_out_info, &forget_outstage_info));
899 
900  const float recurrent_to_forget_scale = recurrent_to_forget_weights->quantization_info().uniform().scale *
901  qoutput_state_in.scale / lstm_params.forget_intermediate_scale();
902  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed,
903  &eff_bias_info, recurrent_to_forget_scale, &mm_out_info,
904  &forget_outstage_info));
905 
906  ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&forget_outstage_info, &forget_outstage_info,
907  &forget_outstage_info, ConvertPolicy::SATURATE));
908 
909  if (lstm_params.has_peephole_opt())
910  {
914  CLPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_forget_weights(), &mm_out_info, 1.f,
916  const float cell_to_forget_scale = std::pow(2, cell_shift) *
917  lstm_params.cell_to_forget_weights()->quantization_info().uniform().scale /
918  lstm_params.forget_intermediate_scale();
920  cell_to_forget_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
922  CLGEMMLowpOutputStage::validate(&mm_out_info, nullptr, &forget_outstage_info, gemmlowp_info));
923  ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&forget_outstage_info, &forget_outstage_info,
924  &forget_outstage_info, ConvertPolicy::SATURATE));
925  }
926 
927  if (has_layer_norm)
928  {
929  const ITensorInfo *w_info = lstm_params.forget_layer_norm_weights();
930  const ITensorInfo *b_info = forget_gate_bias;
931  ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(forget_outstage_info, *w_info, *b_info));
932  }
933 
934  // Output quantization info of Sigmoid and Tanh activations
935  const QuantizationInfo sigmoid_tanh_outqinfo(1.f / 32768.f, 0);
936 
937  const TensorInfo forget_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
939  CLActivationLayer::validate(&forget_outstage_info, &forget_gate_info,
940  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
941 
942  // Modulation gate.
944  const TensorInfo cell_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16,
945  QuantizationInfo(lstm_params.cell_intermediate_scale(), 0));
946  const float input_to_cell_scale = input_to_cell_weights->quantization_info().uniform().scale * qinput.scale /
947  lstm_params.cell_intermediate_scale();
948  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info,
949  input_to_cell_scale, &mm_out_info, &cell_outstage_info));
950 
951  const float recurrent_to_cell_scale = recurrent_to_cell_weights->quantization_info().uniform().scale *
952  qoutput_state_in.scale / lstm_params.cell_intermediate_scale();
953  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed,
954  &eff_bias_info, recurrent_to_cell_scale, &mm_out_info,
955  &cell_outstage_info));
956 
957  ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_outstage_info, &cell_outstage_info,
958  &cell_outstage_info, ConvertPolicy::SATURATE));
959 
960  if (has_layer_norm)
961  {
962  const ITensorInfo *w_info = lstm_params.cell_layer_norm_weights();
963  const ITensorInfo *b_info = cell_bias;
964  ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(cell_outstage_info, *w_info, *b_info));
965  }
966 
967  const TensorInfo cell_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
969  CLActivationLayer::validate(&cell_outstage_info, &cell_gate_info,
970  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)));
971 
972  // Input gate.
973  const TensorInfo input_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
974  if (lstm_params.has_cifg_opt())
975  {
976  ARM_COMPUTE_RETURN_ERROR_ON_MSG(lstm_params.input_gate_bias() != nullptr,
977  "Input gate bias must not be present when CIFG is used");
978  ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticSubtraction::validate(&input_gate_info, &forget_gate_info,
979  &forget_gate_info, ConvertPolicy::SATURATE));
980  }
981  else
982  {
984  lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias());
989  lstm_params.recurrent_to_input_weights());
992 
994  const TensorInfo input_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16,
995  QuantizationInfo(lstm_params.input_intermediate_scale(), 0));
996  const float input_to_input_scale = lstm_params.input_to_input_weights()->quantization_info().uniform().scale *
997  qinput.scale / lstm_params.input_intermediate_scale();
998  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info,
999  input_to_input_scale, &mm_out_info, &input_outstage_info));
1000 
1001  const float recurrent_to_input_scale =
1002  lstm_params.recurrent_to_input_weights()->quantization_info().uniform().scale * qoutput_state_in.scale /
1003  lstm_params.input_intermediate_scale();
1004  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed,
1005  &eff_bias_info, recurrent_to_input_scale, &mm_out_info,
1006  &input_outstage_info));
1007 
1008  ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&input_outstage_info, &input_outstage_info,
1009  &input_outstage_info, ConvertPolicy::SATURATE));
1010 
1011  if (lstm_params.has_peephole_opt())
1012  {
1014  CLPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_input_weights(), &mm_out_info,
1016  const float cell_to_input_scale = std::pow(2, cell_shift) *
1017  lstm_params.cell_to_input_weights()->quantization_info().uniform().scale /
1018  lstm_params.input_intermediate_scale();
1020  cell_to_input_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
1022  CLGEMMLowpOutputStage::validate(&mm_out_info, &eff_bias_info, &input_outstage_info, gemmlowp_info));
1023  ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&input_outstage_info, &input_outstage_info,
1024  &input_outstage_info, ConvertPolicy::SATURATE));
1025  }
1026 
1027  if (has_layer_norm)
1028  {
1029  const ITensorInfo *w_info = lstm_params.input_layer_norm_weights();
1030  const ITensorInfo *b_info = lstm_params.input_gate_bias();
1031  ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(cell_outstage_info, *w_info, *b_info));
1032  }
1033 
1035  &input_outstage_info, &input_gate_info,
1036  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 1.f, 1.f)));
1037  }
1038  // Cell.
1040  &forget_gate_info, cell_state_in, &forget_gate_info, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
1042  &input_gate_info, cell_state_in, &cell_gate_info, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
1044  CLArithmeticAddition::validate(&forget_gate_info, &cell_gate_info, cell_state_out, ConvertPolicy::SATURATE));
1045  if (quantized_cell_clip > 0)
1046  {
1048  CLActivationLayer::validate(cell_state_out, nullptr,
1049  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
1050  -quantized_cell_clip, quantized_cell_clip)));
1051  }
1052  // Output gate.
1054  const TensorInfo output_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16,
1055  QuantizationInfo(lstm_params.output_intermediate_scale(), 0));
1056  const float input_to_output_scale = input_to_output_weights->quantization_info().uniform().scale * qinput.scale /
1057  lstm_params.output_intermediate_scale();
1058  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info,
1059  input_to_output_scale, &mm_out_info, &output_outstage_info));
1060 
1061  const float recurrent_to_output_scale = recurrent_to_output_weights->quantization_info().uniform().scale *
1062  qoutput_state_in.scale / lstm_params.output_intermediate_scale();
1063  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed,
1064  &eff_bias_info, recurrent_to_output_scale, &mm_out_info,
1065  &output_outstage_info));
1066 
1067  ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_outstage_info, &output_outstage_info,
1068  &output_outstage_info, ConvertPolicy::SATURATE));
1069  if (lstm_params.has_peephole_opt())
1070  {
1073  // TODO(COMPMID-3395): Perform multiplication in the quantized domain in NEPixelWiseMultiplicationKernel
1074  // Here we are not using the output stage because all operations are done in float
1075  // const float cell_to_output_scale = std::pow(2, cell_shift) * lstm_params.cell_to_output_weights()->quantization_info().uniform().scale / lstm_params.output_intermediate_scale();
1076  // ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_output_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
1078  cell_state_out, lstm_params.cell_to_output_weights(), &output_outstage_info, 1.f, ConvertPolicy::SATURATE,
1080  ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_outstage_info, &output_outstage_info,
1081  &output_outstage_info, ConvertPolicy::SATURATE));
1082  }
1083 
1084  if (has_layer_norm)
1085  {
1086  const ITensorInfo *w_info = lstm_params.output_layer_norm_weights();
1087  const ITensorInfo *b_info = output_gate_bias;
1088  ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(output_outstage_info, *w_info, *b_info));
1089  }
1090 
1091  const TensorInfo output_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
1093  CLActivationLayer::validate(&output_outstage_info, &output_gate_info,
1094  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
1095 
1096  // Hidden.
1098  CLActivationLayer::validate(cell_state_out, &input_gate_info,
1099  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)));
1100  const TensorInfo hidden_mul_res(TensorShape(num_units, batch_size), 1, DataType::S32);
1101  const TensorInfo hidden_out_info(TensorShape(num_units, batch_size), 1, DataType::QASYMM8_SIGNED);
1102 
1103  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.hidden_state_scale() == 0);
1105  &output_gate_info, &input_gate_info, &hidden_mul_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
1106  const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
1108  quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier,
1109  &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true));
1110  gemmlowp_info.gemmlowp_offset = lstm_params.hidden_state_zero();
1111  gemmlowp_info.output_data_type = hidden_out_info.data_type();
1113  CLGEMMLowpOutputStage::validate(&hidden_mul_res, nullptr, &hidden_out_info, gemmlowp_info));
1114 
1115  const bool projection_tensor_copy_required = num_units != output_size;
1116 
1117  // Projection.
1118  if (lstm_params.has_projection())
1119  {
1121  lstm_params.projection_weights());
1122  ARM_COMPUTE_RETURN_ERROR_ON(qoutput_state_in.scale == 0);
1123 
1124  const UniformQuantizationInfo qprojection = lstm_params.projection_weights()->quantization_info().uniform();
1125  const float projection_scale = qprojection.scale * lstm_params.hidden_state_scale() / qoutput_state_in.scale;
1127  projection_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
1128  gemmlowp_info.gemmlowp_offset = qoutput_state_in.offset;
1130  gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int8_t>::max();
1132 
1133  const TensorInfo projection_outstage_info(*output_state_out);
1134  const TensorInfo projection_weights_transposed(TensorShape(output_size, num_units), 1,
1135  lstm_params.projection_weights()->data_type(),
1136  lstm_params.projection_weights()->quantization_info());
1137 
1138  TensorInfo projection_mm_out_info{mm_out_info};
1139  projection_mm_out_info.set_tensor_shape(TensorShape(output_size, batch_size));
1140 
1141  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, &hidden_out_info, &projection_weights_transposed,
1142  &projection_eff_bias_info, projection_scale, &projection_mm_out_info,
1143  &projection_outstage_info));
1144 
1145  if (projection_tensor_copy_required)
1146  {
1148  CLQLSTMLayer::TensorCopyKernel::validate(*output_state_in, projection_outstage_info));
1149  }
1150 
1151  ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(output_state_out, output_state_out, output_state_out,
1153 
1154  if (projection_tensor_copy_required)
1155  {
1157  CLQLSTMLayer::TensorCopyKernel::validate(projection_outstage_info, *output_state_out));
1158  }
1159 
1160  int8_t quantized_projection_clip{0};
1161  if (lstm_params.projection_clip() > 0.0f)
1162  {
1163  quantized_projection_clip = quantize_qasymm8_signed(lstm_params.projection_clip(), qprojection);
1164  }
1165 
1166  if (quantized_projection_clip > 0)
1167  {
1169  output_state_out, nullptr,
1170  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
1171  -quantized_projection_clip, quantized_projection_clip)));
1172  }
1173  }
1174  else
1175  {
1176  if (projection_tensor_copy_required)
1177  {
1178  ARM_COMPUTE_RETURN_ON_ERROR(CLQLSTMLayer::TensorCopyKernel::validate(hidden_out_info, *output_state_out));
1179  }
1180  }
1181 
1182  if (cell_state_out->total_size() > 0)
1183  {
1184  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(cell_state_in, cell_state_out);
1185  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(cell_state_in, cell_state_out);
1186  }
1187 
1188  if (output_state_out->total_size() > 0)
1189  {
1191  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output_state_in, output_state_out);
1192  }
1193 
1194  ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(output_state_out, output));
1195  return Status{};
1196 }
1197 
1199 {
1200  prepare();
1201 
1202  // Acquire all the temporaries
1203  MemoryGroupResourceScope scope_mg(_memory_group);
1204 
1205  // Forget gate.
1206  _mm_input_to_forget.run();
1207  _input_to_forget_outstage.run();
1208 
1209  _mm_recurrent_to_forget.run();
1210  _recurrent_to_forget_outstage.run();
1211  _accumulate_input_recurrent_forget.run();
1212 
1213  if (_has_peephole)
1214  {
1215  _pixelwise_mul_cell_to_forget.run();
1216  _cell_to_forget_outstage.run();
1217  _accumulate_cell_forget.run();
1218  }
1219 
1220  if (_has_layer_norm)
1221  {
1222  CLScheduler::get().enqueue(get_layer_norm(LayerNormGate::Forget));
1223  }
1224 
1225  _forget_gate_sigmoid.run();
1226 
1227  // Modulation gate.
1228  _mm_input_to_cell.run();
1229  _input_to_cell_outstage.run();
1230 
1231  _mm_recurrent_to_cell.run();
1232  _recurrent_to_cell_outstage.run();
1233  _accumulate_input_recurrent_modulation.run();
1234 
1235  if (_has_layer_norm)
1236  {
1237  CLScheduler::get().enqueue(get_layer_norm(LayerNormGate::Cell));
1238  }
1239 
1240  _cell_gate_tanh.run();
1241 
1242  // Input gate
1243  if (_has_cifg)
1244  {
1245  _input_gate_sub.run();
1246  }
1247  else
1248  {
1249  _mm_input_to_input.run();
1250  _input_to_input_outstage.run();
1251  _mm_recurrent_to_input.run();
1252  _recurrent_to_input_outstage.run();
1253  _accumulate_input_recurrent_input.run();
1254 
1255  if (_has_peephole)
1256  {
1257  _pixelwise_mul_cell_to_input.run();
1258  _cell_to_input_outstage.run();
1259  _accumulate_cell_input.run();
1260  }
1261 
1262  if (_has_layer_norm)
1263  {
1264  CLScheduler::get().enqueue(get_layer_norm(LayerNormGate::Input));
1265  }
1266 
1267  _input_gate_sigmoid.run();
1268  }
1269 
1270  // Cell.
1271  _pixelwise_mul_forget_cell.run();
1272  _pixelwise_mul_input_cell.run();
1273  _add_forget_cell.run();
1274  if (_has_cell_clipping)
1275  {
1276  _cell_clip.run();
1277  }
1278 
1279  // Output gate.
1280  _mm_input_to_output.run();
1281  _input_to_output_outstage.run();
1282  _mm_recurrent_to_output.run();
1283  _recurrent_to_output_outstage.run();
1284  _accumulate_input_recurrent_output.run();
1285  if (_has_peephole)
1286  {
1287  _pixelwise_mul_cell_to_output.run();
1288  _cell_to_output_outstage.run();
1289  _accumulate_cell_to_output.run();
1290  }
1291 
1292  if (_has_layer_norm)
1293  {
1294  CLScheduler::get().enqueue(get_layer_norm(LayerNormGate::Output));
1295  }
1296 
1297  _output_gate_sigmoid.run();
1298 
1299  // Hidden.
1300  _hidden_tanh.run();
1301  _pixelwise_mul_hidden.run();
1302  _hidden_outstage.run();
1303 
1304  // Projection.
1305  if (_has_projection)
1306  {
1307  _mm_projection.run();
1308  _projection_outstage.run();
1309 
1310  if (_projection_tensor_copy_required)
1311  {
1312  _projection_output_to_accumulate_copy.run();
1313  }
1314 
1315  _accumulate_projection.run();
1316 
1317  if (_projection_tensor_copy_required)
1318  {
1319  _projection_accumulate_to_output_copy.run();
1320  }
1321 
1322  if (_has_projection_clipping)
1323  {
1324  _projection_clip.run();
1325  }
1326  }
1327  else
1328  {
1329  if (_projection_tensor_copy_required)
1330  {
1331  _hidden_to_output_copy.run();
1332  }
1333  }
1334 
1335  // Copy output_state_out to output
1336  _copy_output.run();
1337 }
1338 
1340 {
1341  if (!_is_prepared)
1342  {
1343  // Pre-transpose weights to be used in GEMM.
1344  _input_to_forget_weights_transposed.allocator()->allocate();
1345  _input_to_cell_weights_transposed.allocator()->allocate();
1346  _input_to_output_weights_transposed.allocator()->allocate();
1347  _recurrent_to_forget_weights_transposed.allocator()->allocate();
1348  _recurrent_to_cell_weights_transposed.allocator()->allocate();
1349  _recurrent_to_output_weights_transposed.allocator()->allocate();
1350  _transpose_input_to_forget_weights.run();
1351  _transpose_input_to_cell_weights.run();
1352  _transpose_input_to_output_weights.run();
1353  _transpose_recurrent_to_forget_weights.run();
1354  _transpose_recurrent_to_cell_weights.run();
1355  _transpose_recurrent_to_output_weights.run();
1356 
1357  // Precompute effective biases
1358  if (_has_cifg)
1359  {
1360  _ones.map(true);
1361  std::fill_n(reinterpret_cast<int16_t *>(_ones.buffer()),
1362  _ones.info()->total_size() / _ones.info()->element_size(), 32767);
1363  _ones.unmap();
1364  }
1365  else
1366  {
1367  _input_to_input_eff_bias.allocator()->allocate();
1368  _recurrent_to_input_eff_bias.allocator()->allocate();
1369 
1370  ITensorPack input_to_input_red_pack = {{ACL_SRC, _input_to_input_weights},
1371  {ACL_DST, &_input_to_input_eff_bias}};
1372  CLScheduler::get().enqueue_op(*_input_to_input_reduction, input_to_input_red_pack, false);
1373 
1374  ITensorPack rec_to_input_red_pack = {{ACL_SRC, _recurrent_to_input_weights},
1375  {ACL_DST, &_recurrent_to_input_eff_bias}};
1376  CLScheduler::get().enqueue_op(*_recurrent_to_input_reduction, rec_to_input_red_pack, false);
1377 
1378  _input_to_input_weights_transposed.allocator()->allocate();
1379  _recurrent_to_input_weights_transposed.allocator()->allocate();
1380  _transpose_input_to_input_weights.run();
1381  _transpose_recurrent_to_input_weights.run();
1382  _input_to_input_weights->mark_as_unused();
1383  _recurrent_to_input_weights->mark_as_unused();
1384  }
1385  _input_to_forget_eff_bias.allocator()->allocate();
1386  _recurrent_to_forget_eff_bias.allocator()->allocate();
1387  _input_to_cell_eff_bias.allocator()->allocate();
1388  _recurrent_to_cell_eff_bias.allocator()->allocate();
1389  _input_to_output_eff_bias.allocator()->allocate();
1390  _recurrent_to_output_eff_bias.allocator()->allocate();
1391 
1392  ITensorPack input_to_forget_red_pack = {{ACL_SRC, _input_to_forget_weights},
1393  {ACL_DST, &_input_to_forget_eff_bias}};
1394  CLScheduler::get().enqueue_op(*_input_to_forget_reduction, input_to_forget_red_pack, false);
1395 
1396  ITensorPack rec_to_forget_red_pack = {{ACL_SRC, _recurrent_to_forget_weights},
1397  {ACL_DST, &_recurrent_to_forget_eff_bias}};
1398  CLScheduler::get().enqueue_op(*_recurrent_to_forget_reduction, rec_to_forget_red_pack, false);
1399 
1400  ITensorPack input_to_cell_red_pack = {{ACL_SRC, _input_to_cell_weights}, {ACL_DST, &_input_to_cell_eff_bias}};
1401  CLScheduler::get().enqueue_op(*_input_to_cell_reduction, input_to_cell_red_pack, false);
1402 
1403  ITensorPack rec_to_cell_red_pack = {{ACL_SRC, _recurrent_to_cell_weights},
1404  {ACL_DST, &_recurrent_to_cell_eff_bias}};
1405  CLScheduler::get().enqueue_op(*_recurrent_to_cell_reduction, rec_to_cell_red_pack, false);
1406 
1407  ITensorPack input_to_output_red_pack = {{ACL_SRC, _input_to_output_weights},
1408  {ACL_DST, &_input_to_output_eff_bias}};
1409  CLScheduler::get().enqueue_op(*_input_to_output_reduction, input_to_output_red_pack, false);
1410 
1411  ITensorPack rec_to_output_red_pack = {{ACL_SRC, _recurrent_to_output_weights},
1412  {ACL_DST, &_recurrent_to_output_eff_bias}};
1413  CLScheduler::get().enqueue_op(*_recurrent_to_output_reduction, rec_to_output_red_pack, false);
1414 
1415  if (_has_projection)
1416  {
1417  _projection_eff_bias.allocator()->allocate();
1418  ITensorPack proj_red_pack{{ACL_SRC, _projection_weights}, {ACL_DST, &_projection_eff_bias}};
1419  CLScheduler::get().enqueue_op(*_projection_reduction, proj_red_pack, false);
1420  if (_projection_bias != nullptr)
1421  {
1422  _projection_bias_add.run();
1423  _projection_bias->mark_as_unused();
1424  }
1425 
1426  _projection_weights_transposed.allocator()->allocate();
1427  _transpose_projection_weights.run();
1428  _projection_weights->mark_as_unused();
1429 
1430  if (!_projection_tensor_copy_required)
1431  {
1432  _hidden_gate.mark_as_unused();
1433  _projection_accumulate_res.mark_as_unused();
1434  }
1435  }
1436 
1437  // Mark weights as unused
1438  _input_to_forget_weights->mark_as_unused();
1439  _input_to_cell_weights->mark_as_unused();
1440  _input_to_output_weights->mark_as_unused();
1441  _recurrent_to_forget_weights->mark_as_unused();
1442  _recurrent_to_cell_weights->mark_as_unused();
1443  _recurrent_to_output_weights->mark_as_unused();
1444 
1445  CLScheduler::get().queue().finish();
1446  _is_prepared = true;
1447  }
1448 }
1449 
1450 } // namespace arm_compute
arm_compute::CLPixelWiseMultiplication::validate
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of CLPixelWiseMultiplicatio...
Definition: CLPixelWiseMultiplication.cpp:80
arm_compute::CLTranspose::run
void run() override
Run the kernels contained in the function.
Definition: CLTranspose.cpp:66
arm_compute::CLGEMMLowpOutputStage::configure
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
Definition: CLGEMMLowpOutputStage.cpp:57
arm_compute::CLTranspose::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
Definition: CLTranspose.cpp:61
arm_compute::test::validation::configure
im2col_func configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias)
arm_compute::CLQLSTMLayerNormalizationKernel::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias)
Static function to check if given info will lead to a valid configuration of CLQLSTMLayerNormalizatio...
Definition: CLQLSTMLayerNormalizationKernel.cpp:162
arm_compute::CLQLSTMLayerNormalizationKernel::configure
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias)
Initialise the kernel's input and outputs.
Definition: CLQLSTMLayerNormalizationKernel.cpp:154
arm_compute::CLTensor::unmap
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
Definition: CLTensor.cpp:71
arm_compute::CLArithmeticSubtraction::configure
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Definition: CLElementwiseOperations.cpp:106
arm_compute::UniformQuantizationInfo::offset
int32_t offset
Definition: QuantizationInfo.h:63
arm_compute::test::validation::run
lstmq run()
arm_compute::CLArithmeticAddition::run
void run() override
Run the kernels contained in the function.
Definition: CLElementwiseOperations.cpp:81
arm_compute::test::validation::src
SimpleTensor< float > src
Definition: DFT.cpp:155
arm_compute::CLGEMMLowpMatrixMultiplyCore::validate
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
Definition: CLGEMMLowpMatrixMultiplyCore.cpp:103
arm_compute::MemoryGroup::manage
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:76
arm_compute::ITensorInfo::tensor_shape
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
arm_compute::QuantizationInfo
Quantization information.
Definition: QuantizationInfo.h:67
arm_compute::LSTMParams::forget_intermediate_scale
float forget_intermediate_scale() const
Definition: LSTMParams.h:278
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:949
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_multiplier
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: GEMMInfo.h:49
arm_compute::calculate_max_window
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Definition: WindowHelpers.cpp:29
arm_compute::GEMMLowpOutputStageInfo
GEMMLowp output stage info.
Definition: GEMMInfo.h:45
arm_compute::CLScheduler::enqueue
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Definition: CLScheduler.cpp:232
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_offset
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
Definition: GEMMInfo.h:48
arm_compute::quantize_qsymm16
int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit symmetric quantization scheme.
Definition: QuantizationInfo.h:441
CLQLSTMLayerNormalizationKernel.h
arm_compute::TensorShape
Shape of a tensor.
Definition: TensorShape.h:39
arm_compute::ITensorAllocator::init
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Definition: ITensorAllocator.cpp:33
arm_compute::test::validation::dst
auto dst
Definition: DFT.cpp:170
arm_compute::support::cpp11::lowest
T lowest()
Definition: ToolchainSupport.h:279
arm_compute::GEMMLowpOutputStageInfo::type
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Definition: GEMMInfo.h:47
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:574
arm_compute::DataType::QSYMM8
@ QSYMM8
quantized, symmetric fixed-point 8-bit number
arm_compute::CLPixelWiseMultiplication::configure
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
Definition: CLPixelWiseMultiplication.cpp:51
arm_compute::ICLTensor
Interface for OpenCL tensor.
Definition: ICLTensor.h:41
arm_compute::LSTMParams::cell_to_input_weights
T * cell_to_input_weights() const
Definition: LSTMParams.h:213
arm_compute::CLActivationLayer::configure
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
Definition: CLActivationLayer.cpp:53
arm_compute::CLQLSTMLayer::run
void run() override
Run the kernels contained in the function.
Definition: CLQLSTMLayer.cpp:1198
arm_compute::CLTensor::map
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
Definition: CLTensor.cpp:66
arm_compute::LSTMParams::cell_layer_norm_weights
T * cell_layer_norm_weights() const
Definition: LSTMParams.h:253
arm_compute::TensorInfo::quantization_info
QuantizationInfo quantization_info() const override
Get the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.h:299
arm_compute::CLPixelWiseMultiplication::run
void run() override
Run the kernels contained in the function.
Definition: CLPixelWiseMultiplication.cpp:91
arm_compute::CLCopy::configure
void configure(ICLTensor *input, ICLTensor *output, Window *dst_window=nullptr)
Initialise the function's source and destination.
Definition: CLCopy.cpp:53
arm_compute::CLTensor
Basic implementation of the OpenCL tensor interface.
Definition: CLTensor.h:41
arm_compute::LSTMParams::output_layer_norm_weights
T * output_layer_norm_weights() const
Definition: LSTMParams.h:258
arm_compute::GEMMLowpReductionKernelInfo
Definition: KernelDescriptors.h:177
arm_compute::UniformQuantizationInfo
Quantization info when assuming per layer quantization.
Definition: QuantizationInfo.h:42
arm_compute::TensorInfo::data_type
DataType data_type() const override
Data type used for each element of the tensor.
Definition: TensorInfo.h:253
arm_compute::CLKernelLibrary::get
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Definition: CLKernelLibrary.cpp:41
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:677
arm_compute::CLQLSTMLayer::CLQLSTMLayer
CLQLSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
Definition: CLQLSTMLayer.cpp:99
arm_compute::test::validation::forget_gate_bias
auto forget_gate_bias
Definition: LSTMLayerQuantized.cpp:481
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:952
arm_compute::opencl::kernels::ClGemmLowpMatrixAReductionKernel::validate
static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info)
Static function to check if given info will lead to a valid configuration.
Definition: ClGemmLowpReductionKernel.cpp:131
arm_compute::DataType::QSYMM16
@ QSYMM16
quantized, symmetric fixed-point 16-bit number
arm_compute::test::validation::output_gate_bias
auto output_gate_bias
Definition: LSTMLayerQuantized.cpp:483
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_max_bound
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8.
Definition: GEMMInfo.h:54
ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:205
arm_compute::ITensorInfo::dimension
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
arm_compute::ActivationLayerInfo
Activation Layer Information class.
Definition: ActivationLayerInfo.h:55
arm_compute::LSTMParams::input_intermediate_scale
float input_intermediate_scale() const
Definition: LSTMParams.h:273
ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
arm_compute::ICLTensor::buffer
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory.
Definition: ICLTensor.cpp:52
arm_compute::ITensor::info
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
ClGemmLowpReductionKernel.h
ARM_COMPUTE_ERROR_ON
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
arm_compute::LSTMParams::cell_to_output_weights
T * cell_to_output_weights() const
Definition: LSTMParams.h:228
arm_compute::test::validation::recurrent_to_forget_weights
auto recurrent_to_forget_weights
Definition: LSTMLayerQuantized.cpp:477
ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
arm_compute::LSTMParams::projection_clip
float projection_clip() const
Definition: LSTMParams.h:268
arm_compute::ITensorPack
Tensor packing service.
Definition: ITensorPack.h:39
arm_compute::LSTMParams::projection_weights
const T * projection_weights() const
Definition: LSTMParams.h:233
arm_compute::CLCompileContext
CLCompileContext class.
Definition: CLCompileContext.h:204
arm_compute::CLGEMMLowpMatrixMultiplyCore::run
void run() override
Run the kernels contained in the function.
Definition: CLGEMMLowpMatrixMultiplyCore.cpp:112
arm_compute::execute_window_loop
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:74
arm_compute::quantize_qasymm8_signed
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
Definition: QuantizationInfo.h:309
ARM_COMPUTE_RETURN_ERROR_ON
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:298
CLFillBorderKernel.h
arm_compute::TensorInfo::total_size
size_t total_size() const override
Returns the total size of the tensor in bytes.
Definition: TensorInfo.h:261
arm_compute::ACL_DST
@ ACL_DST
Definition: Types.h:55
arm_compute::ITensor::mark_as_unused
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:167
arm_compute::CLGEMMLowpOutputStage::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClGemmL...
Definition: CLGEMMLowpOutputStage.cpp:83
arm_compute::Status
Status class.
Definition: Error.h:52
arm_compute::CLQLSTMLayer::configure
void configure(const ICLTensor *input, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, ICLTensor *cell_state_in, ICLTensor *output_state_in, ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output, const LSTMParams< ICLTensor > &lstm_params)
Initialize function's tensors.
Definition: CLQLSTMLayer.cpp:170
arm_compute::DataType::QASYMM8_SIGNED
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
WindowHelpers.h
arm_compute::CLArithmeticAddition::configure
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Definition: CLElementwiseOperations.cpp:52
InfoHelpers.h
arm_compute::ConvertPolicy::SATURATE
@ SATURATE
Saturate.
CLScheduler.h
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
arm_compute::ITensorInfo::data_type
virtual DataType data_type() const =0
Data type used for each element of the tensor.
arm_compute::LSTMParams::use_layer_norm
bool use_layer_norm() const
Definition: LSTMParams.h:318
arm_compute::utils::info_helpers::build_lstm_params_tensor_info
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor.
Definition: InfoHelpers.h:73
bias
const int32_t * bias
Definition: working_space.hpp:322
arm_compute::CLTranspose::configure
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
Definition: CLTranspose.cpp:47
arm_compute::QuantizationInfo::uniform
UniformQuantizationInfo uniform() const
Return per layer quantization info.
Definition: QuantizationInfo.h:140
CLQLSTMLayer.h
arm_compute::test::validation::recurrent_to_output_weights
auto recurrent_to_output_weights
Definition: LSTMLayerQuantized.cpp:479
arm_compute::CLActivationLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
Definition: CLActivationLayer.cpp:73
arm_compute::LSTMParams::has_peephole_opt
bool has_peephole_opt() const
Definition: LSTMParams.h:303
arm_compute::CLTensorAllocator::allocate
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Definition: CLTensorAllocator.cpp:131
arm_compute::LSTMParams::cell_clip
float cell_clip() const
Definition: LSTMParams.h:263
arm_compute::CLArithmeticAddition::validate
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
Definition: CLElementwiseOperations.cpp:72
arm_compute::utils::info_helpers
Definition: InfoHelpers.h:35
arm_compute::CLTensor::allocator
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: CLTensor.cpp:61
arm_compute::LSTMParams::has_cifg_opt
bool has_cifg_opt() const
Definition: LSTMParams.h:313
arm_compute::CLScheduler::get
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:112
arm_compute::LSTMParams::recurrent_to_input_weights
const T * recurrent_to_input_weights() const
Definition: LSTMParams.h:208
arm_compute::CLQLSTMLayer::~CLQLSTMLayer
~CLQLSTMLayer()
Default destructor.
arm_compute::CLGEMMLowpOutputStage::run
void run() override
Run the kernels contained in the function.
Definition: CLGEMMLowpOutputStage.cpp:91
arm_compute::LSTMParams::hidden_state_zero
int32_t hidden_state_zero() const
Definition: LSTMParams.h:293
arm_compute::ITensorInfo::quantization_info
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
AsymmHelpers.h
Utils.h
arm_compute::TensorInfo
Store the tensor's metadata.
Definition: TensorInfo.h:41
arm_compute::UniformQuantizationInfo::scale
float scale
Definition: QuantizationInfo.h:62
arm_compute::LSTMParams::input_layer_norm_weights
T * input_layer_norm_weights() const
Definition: LSTMParams.h:243
arm_compute::CLCopy::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *dst_window=nullptr)
Static function to check if given info will lead to a valid configuration of CLCopy.
Definition: CLCopy.cpp:70
KernelDescriptors.h
arm_compute::CLQLSTMLayer::prepare
void prepare() override
Prepare the function for executing.
Definition: CLQLSTMLayer.cpp:1339
ARM_COMPUTE_RETURN_ERROR_ON_MSG
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:245
arm_compute::MemoryGroupResourceScope
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
arm_compute::LSTMParams::output_intermediate_scale
float output_intermediate_scale() const
Definition: LSTMParams.h:288
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::CLArithmeticSubtraction::run
void run() override
Run the kernels contained in the function.
Definition: CLElementwiseOperations.cpp:138
arm_compute::LSTMParams::forget_layer_norm_weights
T * forget_layer_norm_weights() const
Definition: LSTMParams.h:248
arm_compute::GEMMLowpOutputStageInfo::output_data_type
DataType output_data_type
Output tensor data type to use if the output is not initialized.
Definition: GEMMInfo.h:61
arm_compute::opencl::kernels::ClGemmLowpMatrixAReductionKernel
OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
Definition: ClGemmLowpReductionKernel.h:67
arm_compute::RoundingPolicy::TO_ZERO
@ TO_ZERO
Truncates the least significant values that are lost in operations.
arm_compute::LSTMParams::has_projection
bool has_projection() const
Definition: LSTMParams.h:308
arm_compute::CLActivationLayer::run
void run() override
Run the kernels contained in the function.
Definition: CLActivationLayer.cpp:78
arm_compute::CLQLSTMLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params)
Static function to check if given info will lead to a valid configuration of CLQLSTMLayer.
Definition: CLQLSTMLayer.cpp:714
arm_compute::LSTMParams::hidden_state_scale
float hidden_state_scale() const
Definition: LSTMParams.h:298
arm_compute::DataType::S32
@ S32
signed 32-bit number
arm_compute::LSTMParams::cell_to_forget_weights
T * cell_to_forget_weights() const
Definition: LSTMParams.h:223
Log.h
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_shift
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
Definition: GEMMInfo.h:50
arm_compute::quantization::calculate_quantized_multiplier
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Definition: AsymmHelpers.cpp:43
arm_compute::CLScheduler::enqueue_op
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush=true)
Schedule the execution of the passed kernel if possible.
Definition: CLScheduler.cpp:238
arm_compute::CLTensor::info
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: CLTensor.cpp:41
arm_compute::validate
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
Definition: CPPBoxWithNonMaximaSuppressionLimit.cpp:243
arm_compute::LSTMParams
Definition: LSTMParams.h:36
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_min_bound
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8.
Definition: GEMMInfo.h:51
arm_compute::ACL_SRC
@ ACL_SRC
Definition: Types.h:44
arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:44
arm_compute::TensorInfo::set_tensor_shape
ITensorInfo & set_tensor_shape(const TensorShape &shape) override
Set the shape of an already initialized tensor.
Definition: TensorInfo.cpp:370
arm_compute::test::validation::output_size
const int output_size
Definition: LSTMLayerQuantized.cpp:459
arm_compute::opencl::kernels
Definition: CLLSTMLayer.h:51
arm_compute::CLCopy::run
void run() override
Run the kernels contained in the function.
Definition: CLCopy.cpp:75
arm_compute::test::validation::input_to_forget_weights
auto input_to_forget_weights
Definition: LSTMLayerQuantized.cpp:473
arm_compute::LSTMParams::cell_intermediate_scale
float cell_intermediate_scale() const
Definition: LSTMParams.h:283
arm_compute::CLScheduler::queue
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Definition: CLScheduler.cpp:40
arm_compute::LSTMParams::projection_bias
const T * projection_bias() const
Definition: LSTMParams.h:238
ARM_COMPUTE_LOG_PARAMS
#define ARM_COMPUTE_LOG_PARAMS(...)
Definition: Log.h:35
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:674
Validate.h
arm_compute::test::validation::input_to_output_weights
auto input_to_output_weights
Definition: LSTMLayerQuantized.cpp:475
arm_compute::GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT
@ QUANTIZE_DOWN_FIXEDPOINT
Quantize using a fixed point multiplication.
arm_compute::LSTMParams::input_to_input_weights
const T * input_to_input_weights() const
Definition: LSTMParams.h:203
arm_compute::ITensorInfo::total_size
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
QuantizationInfo.h
arm_compute::TensorInfo::tensor_shape
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:245
arm_compute::test::validation::input_to_cell_weights
auto input_to_cell_weights
Definition: LSTMLayerQuantized.cpp:474
arm_compute::test::validation::recurrent_to_cell_weights
auto recurrent_to_cell_weights
Definition: LSTMLayerQuantized.cpp:478
arm_compute::MemoryGroup
Memory group.
Definition: MemoryGroup.h:42
arm_compute::test::validation::input
auto input
Definition: LSTMLayerQuantized.cpp:486
arm_compute::ITensorInfo::num_dimensions
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
arm_compute::test::validation::input_size
const int input_size
Definition: LSTMLayerQuantized.cpp:458
arm_compute::CLArithmeticSubtraction::validate
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
Definition: CLElementwiseOperations.cpp:129
arm_compute::LSTMParams::input_gate_bias
const T * input_gate_bias() const
Definition: LSTMParams.h:218
arm_compute::TensorInfo::element_size
size_t element_size() const override
Element size in bytes calculated as data_size() * num_channels()
Definition: TensorInfo.h:233