Compute Library
 23.11
CLLSTMLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "arm_compute/core/Utils.h"
32 
33 #include "src/common/utils/Log.h"
36 
37 namespace arm_compute
38 {
40 using namespace arm_compute::utils::info_helpers;
41 
42 CLLSTMLayer::CLLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
43  : _memory_group(std::move(memory_manager)),
44  _fully_connected_input_gate(),
45  _accum_input_gate1(),
46  _subtract_input_gate(),
47  _pixelwise_mul_input_gate(),
48  _activation_input_gate(),
49  _fully_connected_forget_gate(),
50  _accum_forget_gate1(),
51  _pixelwise_mul_forget_gate(),
52  _activation_forget_gate(),
53  _fully_connected_cell_state(),
54  _gemm_cell_state1(),
55  _transpose_cell_state(std::make_unique<opencl::kernels::ClTransposeKernel>()),
56  _accum_cell_state1(),
57  _accum_cell_state2(),
58  _pixelwise_mul_cell_state1(),
59  _activation_cell_state(),
60  _cell_clip(),
61  _pixelwise_mul_cell_state2(),
62  _fully_connected_output(),
63  _pixelwise_mul_output_state1(),
64  _accum_output1(),
65  _activation_output(),
66  _activation_output_state(),
67  _pixelwise_mul_output_state2(),
68  _fully_connected_output_state(),
69  _projection_clip(),
70  _copy_cell_state(),
71  _copy_output(),
72  _concat_scratch_buffer(),
73  _concat_inputs_forget_gate(),
74  _concat_weights_forget_gate(),
75  _concat_weights_input_gate(),
76  _concat_weights_output(),
77  _ones_fill(),
78  _mean_std_norm_input_gate(),
79  _pixelwise_mul_input_gate_coeff(),
80  _accum_input_gate_bias(),
81  _mean_std_norm_forget_gate(),
82  _pixelwise_mul_forget_gate_coeff(),
83  _accum_forget_gate_bias(),
84  _mean_std_norm_cell_gate(),
85  _pixelwise_mul_cell_gate_coeff(),
86  _accum_cell_gate_bias(),
87  _mean_std_norm_output_gate(),
88  _pixelwise_mul_output_gate_coeff(),
89  _accum_output_gate_bias(),
90  _input_gate_out1(),
91  _input_gate_out2(),
92  _input_gate_out3(),
93  _input_gate_out4(),
94  _forget_gate_out1(),
95  _forget_gate_out2(),
96  _forget_gate_out3(),
97  _forget_gate_out4(),
98  _forget_gate_out5(),
99  _forget_gate_out6(),
100  _cell_state_out1(),
101  _cell_state_out2(),
102  _cell_state_out3(),
103  _cell_state_out4(),
104  _cell_state_out5(),
105  _output1(),
106  _output2(),
107  _output3(),
108  _output4(),
109  _cell_state_activation(),
110  _output_state1(),
111  _ones(),
112  _input_layer_norm_out1(),
113  _input_layer_norm_out2(),
114  _forget_layer_norm_out1(),
115  _forget_layer_norm_out2(),
116  _cell_layer_norm_out1(),
117  _cell_layer_norm_out2(),
118  _output_layer_norm_out1(),
119  _output_layer_norm_out2(),
120  _run_peephole_opt(false),
121  _run_cifg_opt(false),
122  _perform_cell_clipping(false),
123  _has_projection_weights(false),
124  _perform_projection_clipping(false),
125  _is_prepared(false),
126  _is_layer_norm_lstm(false)
127 {
128 }
129 
130 CLLSTMLayer::~CLLSTMLayer() = default;
131 
140  const ICLTensor *cell_bias,
142  const ICLTensor *output_state_in,
143  ICLTensor *cell_state_in,
144  ICLTensor *scratch_buffer,
145  ICLTensor *output_state_out,
146  ICLTensor *cell_state_out,
147  ICLTensor *output,
148  const LSTMParams<ICLTensor> &lstm_params,
149  const ActivationLayerInfo &activation_info,
150  float cell_threshold,
151  float projection_threshold)
152 {
155  recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, output_state_in,
156  cell_state_in, scratch_buffer, output_state_out, cell_state_out, output, lstm_params, activation_info,
157  cell_threshold, projection_threshold);
158 }
159 
160 void CLLSTMLayer::configure(const CLCompileContext &compile_context,
161  const ICLTensor *input,
169  const ICLTensor *cell_bias,
171  const ICLTensor *output_state_in,
172  ICLTensor *cell_state_in,
173  ICLTensor *scratch_buffer,
174  ICLTensor *output_state_out,
175  ICLTensor *cell_state_out,
176  ICLTensor *output,
177  const LSTMParams<ICLTensor> &lstm_params,
178  const ActivationLayerInfo &activation_info,
179  float cell_threshold,
180  float projection_threshold)
181 {
184  forget_gate_bias, cell_bias, output_gate_bias, output_state_in, cell_state_in,
185  scratch_buffer, output_state_out, cell_state_out, output);
186 
189  forget_gate_bias, cell_bias, output_gate_bias, output_state_in, cell_state_in,
190  scratch_buffer, output_state_out, cell_state_out, output, lstm_params, activation_info,
191  cell_threshold, projection_threshold);
192 
193  _is_layer_norm_lstm = lstm_params.use_layer_norm();
194 
195  // Set lstm parameters
196  LSTMParams<ITensorInfo> lstm_params_info{};
197  build_lstm_params_tensor_info(lstm_params, &lstm_params_info);
198 
199  // Validate
203  forget_gate_bias->info(), cell_bias->info(), output_gate_bias->info(), output_state_in->info(),
204  cell_state_in->info(), scratch_buffer->info(), output_state_out->info(), cell_state_out->info(), output->info(),
205  lstm_params_info, activation_info, cell_threshold, projection_threshold));
206 
207  const TensorShape cell_state_shape = cell_state_in->info()->tensor_shape();
208  // Configure block that calculates the forget gate
209  // forget_gate = Activation(input * input_to_forget_weights + output_state_in * recurrent_to_forget_weights + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias)
210  // We optimize this as follows:
211  // forget_gate = Activation( (input,output_state_in) * (input_to_forget_weights,recurrent_to_forget_weights) + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias
212  _forget_gate_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
213  _forget_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
214  _forget_gate_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
215 
216  std::vector<const ICLTensor *> inputs_vector;
217  inputs_vector.emplace_back(input);
218  inputs_vector.emplace_back(output_state_in);
220  _forget_gate_out2.allocator()->init(TensorInfo(concat_shape, 1, input->info()->data_type()));
221 
222  _memory_group.manage(&_forget_gate_out2);
223  _concat_inputs_forget_gate.configure(compile_context, inputs_vector, &_forget_gate_out2, Window::DimX);
224 
225  std::vector<const ICLTensor *> weights_vector;
226 
227  weights_vector.emplace_back(input_to_forget_weights);
228  weights_vector.emplace_back(recurrent_to_forget_weights);
229  const TensorShape weights_concat_shape =
231  _forget_gate_out6.allocator()->init(TensorInfo(weights_concat_shape, 1, input->info()->data_type()));
232 
233  _concat_weights_forget_gate.configure(compile_context, weights_vector, &_forget_gate_out6, Window::DimX);
234 
235  _memory_group.manage(&_forget_gate_out5);
236  _fully_connected_forget_gate.configure(compile_context, &_forget_gate_out2, &_forget_gate_out6,
237  (_is_layer_norm_lstm) ? nullptr : forget_gate_bias, &_forget_gate_out5);
238  _memory_group.manage(&_forget_gate_out1);
239  _memory_group.manage(&_forget_gate_out3);
240  _forget_gate_out6.allocator()->allocate();
241 
242  CLTensor *forget_gate_out = &_forget_gate_out5;
243  if (lstm_params.has_peephole_opt())
244  {
245  _forget_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
246 
247  _run_peephole_opt = true;
248  _memory_group.manage(&_forget_gate_out4);
249  _pixelwise_mul_forget_gate.configure(compile_context, cell_state_in, lstm_params.cell_to_forget_weights(),
250  &_forget_gate_out4, 1, ConvertPolicy::SATURATE,
252  _accum_forget_gate1.configure(compile_context, &_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3,
254  _forget_gate_out4.allocator()->allocate();
255  _forget_gate_out5.allocator()->allocate();
256  forget_gate_out = &_forget_gate_out3;
257  }
258  else
259  {
260  _forget_gate_out3.allocator()->allocate();
261  }
262  if (_is_layer_norm_lstm)
263  {
264  _forget_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
265  _forget_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
266  _memory_group.manage(&_forget_layer_norm_out1);
267  _memory_group.manage(&_forget_layer_norm_out2);
268  _mean_std_norm_forget_gate.configure(compile_context, forget_gate_out);
269  _pixelwise_mul_forget_gate_coeff.configure(compile_context, forget_gate_out,
270  lstm_params.forget_layer_norm_weights(), &_forget_layer_norm_out1, 1,
272  // forget_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
273  forget_gate_out->allocator()->allocate();
274  _accum_forget_gate_bias.configure(compile_context, &_forget_layer_norm_out1, forget_gate_bias,
275  &_forget_layer_norm_out2, ConvertPolicy::SATURATE);
276  _forget_layer_norm_out1.allocator()->allocate();
277  forget_gate_out = &_forget_layer_norm_out2;
278  }
279  _activation_forget_gate.configure(compile_context, forget_gate_out, nullptr,
280  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
281 
282  // Configure block that calculates the input gate
283  // input_gate = Activation(input * input_to_input_weights + output_state * recurrent_to_input_weights + PixelWiseMul(cell_state, cell_to_input_weights) + input_gate_bias), without CIFG
284  // input_gate = 1 - forget_gate, with CIFG
285  // We optimize this as follows:
286  // input_gate = Activation((input,output_state) * (input_to_input_weights,recurrent_to_input_weights) + PixelWiseMul(cell_state, cell_to_input_weights) + input_gate_bias), without CIFG
287  _input_gate_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
288  CLTensor *input_gate_out = &_input_gate_out1;
289  if (lstm_params.has_cifg_opt())
290  {
291  _memory_group.manage(&_input_gate_out1);
292  _ones.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
293  _ones_fill.configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type()));
294  _subtract_input_gate.configure(compile_context, &_ones, forget_gate_out, &_input_gate_out1,
296  _ones.allocator()->allocate();
297  _run_cifg_opt = true;
298  }
299  else
300  {
301  _input_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
302  _input_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
303 
304  std::vector<const ICLTensor *> lstm_weights;
305  lstm_weights.emplace_back(lstm_params.input_to_input_weights());
306  lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights());
307  TensorShape lstm_weights_concat_shape =
309  _input_gate_out2.allocator()->init(TensorInfo(lstm_weights_concat_shape, 1, input->info()->data_type()));
310 
311  _concat_weights_input_gate.configure(compile_context, lstm_weights, &_input_gate_out2, Window::DimX);
312 
313  _memory_group.manage(&_input_gate_out1);
314 
315  _memory_group.manage(&_input_gate_out3);
316  _fully_connected_input_gate.configure(compile_context, &_forget_gate_out2, &_input_gate_out2,
317  (_is_layer_norm_lstm) ? nullptr : lstm_params.input_gate_bias(),
318  &_input_gate_out3);
319  _input_gate_out2.allocator()->allocate();
320 
321  input_gate_out = &_input_gate_out3;
322  if (_run_peephole_opt)
323  {
324  _memory_group.manage(&_input_gate_out4);
325  _pixelwise_mul_input_gate.configure(compile_context, cell_state_in, lstm_params.cell_to_input_weights(),
326  &_input_gate_out4, 1, ConvertPolicy::SATURATE,
328  _accum_input_gate1.configure(compile_context, &_input_gate_out3, &_input_gate_out4, &_input_gate_out1,
330  _input_gate_out3.allocator()->allocate();
331  _input_gate_out4.allocator()->allocate();
332  input_gate_out = &_input_gate_out1;
333  }
334  else
335  {
336  _input_gate_out1.allocator()->allocate();
337  }
338 
339  if (_is_layer_norm_lstm)
340  {
341  _input_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
342  _input_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
343  _memory_group.manage(&_input_layer_norm_out1);
344  _memory_group.manage(&_input_layer_norm_out2);
345  _mean_std_norm_input_gate.configure(compile_context, input_gate_out);
346  _pixelwise_mul_input_gate_coeff.configure(compile_context, input_gate_out,
347  lstm_params.input_layer_norm_weights(), &_input_layer_norm_out1,
349  // input_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
350  input_gate_out->allocator()->allocate();
351  _accum_input_gate_bias.configure(compile_context, &_input_layer_norm_out1, lstm_params.input_gate_bias(),
352  &_input_layer_norm_out2, ConvertPolicy::SATURATE);
353  _input_layer_norm_out1.allocator()->allocate();
354  input_gate_out = &_input_layer_norm_out2;
355  }
356  _activation_input_gate.configure(compile_context, input_gate_out, nullptr,
357  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
358  }
359 
360  // Configure block that calculates the cell state
361  // cell_state = Clip((PixelwiseMul(input_gate, Activation(input * input_to_cell_weights + output_state_in * recurrent_to_cell_weights + cell_bias)) + PixelwiseMul(forget_gate, cell_state)), cell_threshold)
363  _cell_state_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
364  _cell_state_out2.allocator()->init(TensorInfo(cell_state1_shape, 1, input->info()->data_type()));
365  _cell_state_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
366  _cell_state_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
367  _cell_state_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
368 
369  _memory_group.manage(&_cell_state_out1);
370  _fully_connected_cell_state.configure(compile_context, input, input_to_cell_weights,
371  (_is_layer_norm_lstm) ? nullptr : cell_bias, &_cell_state_out1);
372  _memory_group.manage(&_cell_state_out2);
373  _transpose_cell_state->configure(compile_context, recurrent_to_cell_weights->info(), _cell_state_out2.info());
374  _recurrent_to_cell_weights = recurrent_to_cell_weights;
375  _memory_group.manage(&_cell_state_out3);
376  _gemm_cell_state1.configure(compile_context, output_state_in, &_cell_state_out2, nullptr, &_cell_state_out3, 1.f,
377  0.f);
378  _cell_state_out2.allocator()->allocate();
379  _memory_group.manage(&_cell_state_out4);
380  _accum_cell_state1.configure(compile_context, &_cell_state_out1, &_cell_state_out3, &_cell_state_out4,
382  CLTensor *cell_state_out_ptr = &_cell_state_out4;
383  if (_is_layer_norm_lstm)
384  {
385  _cell_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
386  _cell_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
387  _memory_group.manage(&_cell_layer_norm_out1);
388  _memory_group.manage(&_cell_layer_norm_out2);
389  _mean_std_norm_cell_gate.configure(compile_context, cell_state_out_ptr);
390  _pixelwise_mul_cell_gate_coeff.configure(compile_context, cell_state_out_ptr,
391  lstm_params.cell_layer_norm_weights(), &_cell_layer_norm_out1, 1,
393  // cell_state_out_ptr is going to be reassigned, so allocate the tensor that it was assigned to before
394  cell_state_out_ptr->allocator()->allocate();
395  _accum_cell_gate_bias.configure(compile_context, &_cell_layer_norm_out1, cell_bias, &_cell_layer_norm_out2,
397  _cell_layer_norm_out1.allocator()->allocate();
398  cell_state_out_ptr = &_cell_layer_norm_out2;
399  }
400  _activation_cell_state.configure(compile_context, cell_state_out_ptr, nullptr, activation_info);
401  _memory_group.manage(&_cell_state_out5);
402  _pixelwise_mul_cell_state1.configure(compile_context, cell_state_out_ptr, input_gate_out, &_cell_state_out5, 1,
404  cell_state_out_ptr->allocator()->allocate();
405  _pixelwise_mul_cell_state2.configure(compile_context, forget_gate_out, cell_state_in, &_cell_state_out3, 1,
407  _accum_cell_state2.configure(compile_context, &_cell_state_out5, &_cell_state_out3, &_cell_state_out1,
409  _cell_state_out3.allocator()->allocate();
410  _cell_state_out5.allocator()->allocate();
411  // Perform clipping
412  if (cell_threshold != 0.f)
413  {
414  _perform_cell_clipping = true;
415  _cell_clip.configure(compile_context, &_cell_state_out1, nullptr,
416  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
417  cell_threshold, -cell_threshold));
418  }
419 
420  // Configure block that calculates the output
421  // output_state_out = Activation(input * input_to_output_weights + output_state_in * recurrent_to_output_weights + PixelWiseMul(cell_state, cell_to_output_weights) + output_gate_bias)
422  // We optimize this as follows:
423  // output_state_out = Activation( (input,output_state_in) * (input_to_output_weights, recurrent_to_output_weights) + PixelWiseMul(cell_state, cell_to_output_weights) + output_gate_bias)
424  _output1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
425  _output4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
426  std::vector<const ICLTensor *> in_out_weights;
427  in_out_weights.emplace_back(input_to_output_weights);
428  in_out_weights.emplace_back(recurrent_to_output_weights);
429  TensorShape in_out_weights_concat_shape =
431  _output2.allocator()->init(TensorInfo(in_out_weights_concat_shape, 1, input->info()->data_type()));
432 
433  _concat_weights_output.configure(compile_context, in_out_weights, &_output2, Window::DimX);
434 
435  _memory_group.manage(&_output1);
436  _memory_group.manage(&_output4);
437 
438  _fully_connected_output.configure(compile_context, &_forget_gate_out2, &_output2,
439  (_is_layer_norm_lstm) ? nullptr : output_gate_bias, &_output4);
440 
441  _output2.allocator()->allocate();
442  _forget_gate_out2.allocator()->allocate();
443 
444  CLTensor *output_gate_out = &_output4;
445  if (lstm_params.has_peephole_opt())
446  {
447  _output3.allocator()->init(TensorInfo(_cell_state_out1.info()->tensor_shape(), 1, input->info()->data_type()));
448 
449  _memory_group.manage(&_output3);
450  _pixelwise_mul_output_state1.configure(compile_context, &_cell_state_out1, lstm_params.cell_to_output_weights(),
452  _accum_output1.configure(compile_context, &_output4, &_output3, &_output1, ConvertPolicy::SATURATE);
453  _output4.allocator()->allocate();
454  output_gate_out = &_output1;
455 
456  // Allocate intermediate buffers
457  _output3.allocator()->allocate();
458  }
459  else
460  {
461  _output1.allocator()->allocate();
462  }
463  if (_is_layer_norm_lstm)
464  {
465  _output_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
466  _output_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
467  _memory_group.manage(&_output_layer_norm_out1);
468  _memory_group.manage(&_output_layer_norm_out2);
469  _mean_std_norm_output_gate.configure(compile_context, output_gate_out);
470  _pixelwise_mul_output_gate_coeff.configure(compile_context, output_gate_out,
471  lstm_params.output_layer_norm_weights(), &_output_layer_norm_out1, 1,
473  // output_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
474  output_gate_out->allocator()->allocate();
475  _accum_output_gate_bias.configure(compile_context, &_output_layer_norm_out1, output_gate_bias,
476  &_output_layer_norm_out2, ConvertPolicy::SATURATE);
477  _output_layer_norm_out1.allocator()->allocate();
478  output_gate_out = &_output_layer_norm_out2;
479  }
480  _activation_output.configure(compile_context, output_gate_out, nullptr,
481  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
482 
483  // Configure block that calculates the output state
484  /** lstm_res = PixelwiseMul(output, Activation(cell_state))
485  *
486  * -- Clip(lstm_res * projection_weights + projection_bias, projection_threshold) , if there is a projection
487  * /
488  * output_state = --
489  * \
490  * -- lstm_res , otherwise
491  */
492  ICLTensor *output_state_out_tmp = lstm_params.has_projection() ? &_output_state1 : output_state_out;
493  _cell_state_activation.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
494  _output_state1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
495 
496  _memory_group.manage(&_cell_state_activation);
497  _activation_output_state.configure(compile_context, &_cell_state_out1, &_cell_state_activation, activation_info);
498  _pixelwise_mul_output_state2.configure(compile_context, &_cell_state_activation, output_gate_out,
499  output_state_out_tmp, 1, ConvertPolicy::SATURATE,
501  _cell_state_activation.allocator()->allocate();
502 
503  if (lstm_params.has_projection())
504  {
505  _has_projection_weights = true;
506  _fully_connected_output_state.configure(compile_context, output_state_out_tmp, lstm_params.projection_weights(),
507  lstm_params.projection_bias(), output_state_out);
508  _output_state1.allocator()->allocate();
509  // Perform clipping
510  if (projection_threshold != 0.f)
511  {
512  _perform_projection_clipping = true;
513  _projection_clip.configure(compile_context, output_state_out, nullptr,
514  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
515  -projection_threshold, projection_threshold));
516  }
517  }
518 
519  // Copy cell state and output
520  _copy_cell_state.configure(compile_context, &_cell_state_out1, cell_state_out);
521  _copy_output.configure(compile_context, output_state_out, output);
522 
523  // Vector for holding the tensors to store in scratch buffer
524  std::vector<const ICLTensor *> scratch_inputs;
525  if (!lstm_params.has_cifg_opt())
526  {
527  scratch_inputs.emplace_back(input_gate_out);
528  }
529  scratch_inputs.emplace_back(&_cell_state_out1);
530  scratch_inputs.emplace_back(forget_gate_out);
531  scratch_inputs.emplace_back(output_gate_out);
532  _concat_scratch_buffer.configure(compile_context, scratch_inputs, scratch_buffer, Window::DimX);
533  input_gate_out->allocator()->allocate();
534  _cell_state_out1.allocator()->allocate();
535  forget_gate_out->allocator()->allocate();
536  output_gate_out->allocator()->allocate();
537 }
538 
547  const ITensorInfo *cell_bias,
549  const ITensorInfo *output_state_in,
550  const ITensorInfo *cell_state_in,
551  const ITensorInfo *scratch_buffer,
552  const ITensorInfo *output_state_out,
553  const ITensorInfo *cell_state_out,
554  const ITensorInfo *output,
555  const LSTMParams<ITensorInfo> &lstm_params,
556  const ActivationLayerInfo &activation_info,
557  float cell_threshold,
558  float projection_threshold)
559 {
563  output_state_in, cell_state_in, scratch_buffer, output_state_out, cell_state_out, output);
564 
565  // Check data types
570  output_state_in, cell_state_in, scratch_buffer, output_state_out, cell_state_out, output);
571 
572  // Check dimensions
573  ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
575  ARM_COMPUTE_RETURN_ERROR_ON(input_to_cell_weights->num_dimensions() > 2);
580  ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->num_dimensions() > 1);
582  ARM_COMPUTE_RETURN_ERROR_ON(output_gate_bias->num_dimensions() > 1);
583  ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() > 2);
584  ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->num_dimensions() > 2);
585  ARM_COMPUTE_RETURN_ERROR_ON(scratch_buffer->num_dimensions() > 2);
586  ARM_COMPUTE_RETURN_ERROR_ON(output_state_out->num_dimensions() > 2);
587  ARM_COMPUTE_RETURN_ERROR_ON(cell_state_out->num_dimensions() > 2);
589  ARM_COMPUTE_RETURN_ERROR_ON(cell_bias->dimension(0) * 4 != scratch_buffer->dimension(0) &&
590  cell_bias->dimension(0) * 3 != scratch_buffer->dimension(0));
591 
592  const unsigned int num_batches = input->dimension(1);
593  const unsigned int num_cells = input_to_output_weights->dimension(1);
594 
595  if (lstm_params.use_layer_norm())
596  {
597  // If CIFG is used, input layer normalization weights tensor is omitted
598  if (lstm_params.has_cifg_opt())
599  {
600  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights() != nullptr);
601  }
602  else
603  {
605  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights()->num_dimensions() > 1);
606  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights()->dimension(0) != num_cells);
608  }
609 
611  lstm_params.cell_layer_norm_weights(),
612  lstm_params.output_layer_norm_weights());
614  lstm_params.cell_layer_norm_weights(),
615  lstm_params.output_layer_norm_weights());
616  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_layer_norm_weights()->num_dimensions() > 1);
617  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_layer_norm_weights()->num_dimensions() > 1);
618  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_layer_norm_weights()->num_dimensions() > 1);
619  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_layer_norm_weights()->dimension(0) != num_cells);
620  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_layer_norm_weights()->dimension(0) != num_cells);
621  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_layer_norm_weights()->dimension(0) != num_cells);
622  }
623 
624  // Check peephole optimization
625  if (lstm_params.has_peephole_opt())
626  {
628  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->num_dimensions() > 1);
629  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_output_weights()->num_dimensions() > 1);
630  }
631 
633  TensorShape num_units_transposed_shape = compute_transposed_shape(*forget_gate_bias);
634  const TensorInfo units_out_transposed_info = TensorInfo(units_out_transposed_shape, 1, input->data_type());
635  const TensorInfo num_units_transposed_info = TensorInfo(num_units_transposed_shape, 1, input->data_type());
636 
637  TensorInfo input_gate = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
638  TensorInfo forget_gate = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
639  TensorInfo output_gate_tmp = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
640  TensorInfo cell_state_tmp = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
641 
642  // Validate forget gate
644  input, input_to_forget_weights, (lstm_params.use_layer_norm()) ? nullptr : forget_gate_bias, &forget_gate));
645 
646  std::vector<const ITensorInfo *> inputs_vector;
647  inputs_vector.emplace_back(input);
648  inputs_vector.emplace_back(output_state_in);
650  TensorInfo forget_gate_concat = TensorInfo(concat_shape, 1, input->data_type());
651 
652  ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(inputs_vector, &forget_gate_concat, Window::DimX));
653 
654  if (lstm_params.has_peephole_opt())
655  {
657  CLPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_forget_weights(), &forget_gate, 1,
660  CLArithmeticAddition::validate(&forget_gate, &forget_gate, &forget_gate, ConvertPolicy::SATURATE));
661  }
662  if (lstm_params.use_layer_norm())
663  {
666  CLPixelWiseMultiplication::validate(&forget_gate, lstm_params.forget_layer_norm_weights(), &forget_gate, 1,
670  }
672  &forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
673 
674  // Validate input gate
675  if (!lstm_params.has_cifg_opt())
676  {
678  lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias());
679  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_to_input_weights()->num_dimensions() > 2);
680  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.recurrent_to_input_weights()->num_dimensions() > 2);
681  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_gate_bias()->num_dimensions() > 1);
682 
683  std::vector<const ITensorInfo *> lstm_weights;
684  lstm_weights.emplace_back(lstm_params.input_to_input_weights());
685  lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights());
686  TensorShape lstm_weights_concat_shape =
688  TensorInfo lstm_gate_concat = TensorInfo(lstm_weights_concat_shape, 1, input->data_type());
689  ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(lstm_weights, &lstm_gate_concat, Window::DimX));
690 
692  input, lstm_params.input_to_input_weights(),
693  (lstm_params.use_layer_norm()) ? nullptr : lstm_params.input_gate_bias(), &input_gate));
694 
695  if (lstm_params.has_peephole_opt())
696  {
698  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_input_weights()->num_dimensions() > 1);
700  CLPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_input_weights(), &input_gate, 1,
703  CLArithmeticAddition::validate(&input_gate, &input_gate, &input_gate, ConvertPolicy::SATURATE));
704  }
705 
706  if (lstm_params.use_layer_norm())
707  {
710  CLPixelWiseMultiplication::validate(&input_gate, lstm_params.input_layer_norm_weights(), &input_gate, 1,
713  &input_gate, ConvertPolicy::SATURATE));
714  }
716  &input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
717  }
718  else
719  {
721  CLArithmeticSubtraction::validate(&forget_gate, &forget_gate, &forget_gate, ConvertPolicy::SATURATE));
722  }
723 
724  // Validate cell state
726  input, input_to_cell_weights, (lstm_params.use_layer_norm()) ? nullptr : cell_bias, &cell_state_tmp));
728  CLGEMM::validate(output_state_in, &units_out_transposed_info, nullptr, &cell_state_tmp, 1.f, 0.f, GEMMInfo()));
730  CLArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE));
731  if (lstm_params.use_layer_norm())
732  {
735  CLPixelWiseMultiplication::validate(&cell_state_tmp, lstm_params.cell_layer_norm_weights(), &cell_state_tmp,
738  CLArithmeticAddition::validate(&cell_state_tmp, cell_bias, &cell_state_tmp, ConvertPolicy::SATURATE));
739  }
740  ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, nullptr, activation_info));
742  &cell_state_tmp, &input_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
744  &cell_state_tmp, &forget_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
746  CLArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE));
747  if (cell_threshold != 0.f)
748  {
750  CLActivationLayer::validate(&cell_state_tmp, nullptr,
751  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
752  cell_threshold, -cell_threshold)));
753  }
754 
755  std::vector<const ITensorInfo *> in_out_weights;
756  in_out_weights.emplace_back(input_to_output_weights);
757  in_out_weights.emplace_back(recurrent_to_output_weights);
758  TensorShape in_out_weights_concat_shape =
760  TensorInfo in_out_gate_concat = TensorInfo(in_out_weights_concat_shape, 1, input->data_type());
761  ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(in_out_weights, &in_out_gate_concat, Window::DimX));
762  // Validate output gate tmp
764  input, input_to_output_weights, (lstm_params.use_layer_norm()) ? nullptr : output_gate_bias, &output_gate_tmp));
765 
766  if (lstm_params.has_peephole_opt())
767  {
769  CLPixelWiseMultiplication::validate(&cell_state_tmp, lstm_params.cell_to_output_weights(), &output_gate_tmp,
771  ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_gate_tmp, &output_gate_tmp, &output_gate_tmp,
773  }
774  if (lstm_params.use_layer_norm())
775  {
778  &output_gate_tmp, lstm_params.output_layer_norm_weights(), &output_gate_tmp, 1, ConvertPolicy::SATURATE,
782  }
784  &output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
785 
786  // Validate output state
787  ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, &cell_state_tmp, activation_info));
788  ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&cell_state_tmp, &output_gate_tmp, &output_gate_tmp,
791  if (lstm_params.has_projection())
792  {
794  lstm_params.projection_bias(), output_state_out));
795  if (projection_threshold != 0.f)
796  {
798  output_state_out, output_state_out,
799  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold,
800  projection_threshold)));
801  }
802  }
803 
804  // Validate copy kernel
805  ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(&cell_state_tmp, cell_state_out));
806  ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(output_state_out, output));
807 
808  // Validate scratch concatenation
809  std::vector<const ITensorInfo *> inputs_vector_info_raw;
810  if (!lstm_params.has_cifg_opt())
811  {
812  inputs_vector_info_raw.push_back(&input_gate);
813  }
814  inputs_vector_info_raw.push_back(&cell_state_tmp);
815  inputs_vector_info_raw.push_back(&forget_gate);
816  inputs_vector_info_raw.push_back(&output_gate_tmp);
817 
818  ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer, Window::DimX));
819  return Status{};
820 }
821 
823 {
824  prepare();
825 
826  MemoryGroupResourceScope scope_mg(_memory_group);
827 
828  _concat_inputs_forget_gate.run();
829 
830  _fully_connected_forget_gate.run();
831 
832  if (_run_peephole_opt)
833  {
834  _pixelwise_mul_forget_gate.run();
835  _accum_forget_gate1.run();
836  }
837  if (_is_layer_norm_lstm)
838  {
839  _mean_std_norm_forget_gate.run();
840  _pixelwise_mul_forget_gate_coeff.run();
841  _accum_forget_gate_bias.run();
842  }
843  _activation_forget_gate.run();
844 
845  if (_run_cifg_opt)
846  {
847  _ones_fill.run();
848  _subtract_input_gate.run();
849  }
850  else
851  {
852  _fully_connected_input_gate.run();
853 
854  if (_run_peephole_opt)
855  {
856  _pixelwise_mul_input_gate.run();
857  _accum_input_gate1.run();
858  }
859 
860  if (_is_layer_norm_lstm)
861  {
862  _mean_std_norm_input_gate.run();
863  _pixelwise_mul_input_gate_coeff.run();
864  _accum_input_gate_bias.run();
865  }
866  _activation_input_gate.run();
867  }
868 
869  _fully_connected_cell_state.run();
871  pack.add_tensor(TensorType::ACL_SRC, _recurrent_to_cell_weights);
872  pack.add_tensor(TensorType::ACL_DST, &_cell_state_out2);
873  CLScheduler::get().enqueue_op(*_transpose_cell_state, pack, false);
874  _gemm_cell_state1.run();
875  _accum_cell_state1.run();
876  if (_is_layer_norm_lstm)
877  {
878  _mean_std_norm_cell_gate.run();
879  _pixelwise_mul_cell_gate_coeff.run();
880  _accum_cell_gate_bias.run();
881  }
882  _activation_cell_state.run();
883  _pixelwise_mul_cell_state1.run();
884  _pixelwise_mul_cell_state2.run();
885  _accum_cell_state2.run();
886 
887  if (_perform_cell_clipping)
888  {
889  _cell_clip.run();
890  }
891 
892  _fully_connected_output.run();
893 
894  if (_run_peephole_opt)
895  {
896  _pixelwise_mul_output_state1.run();
897  _accum_output1.run();
898  }
899  if (_is_layer_norm_lstm)
900  {
901  _mean_std_norm_output_gate.run();
902  _pixelwise_mul_output_gate_coeff.run();
903  _accum_output_gate_bias.run();
904  }
905  _activation_output.run();
906 
907  _activation_output_state.run();
908  _pixelwise_mul_output_state2.run();
909 
910  if (_has_projection_weights)
911  {
912  _fully_connected_output_state.run();
913  if (_perform_projection_clipping)
914  {
915  _projection_clip.run();
916  }
917  }
918 
919  _copy_cell_state.run();
920  _copy_output.run();
921 
922  _concat_scratch_buffer.run();
923 }
924 
926 {
927  if (!_is_prepared)
928  {
929  _concat_weights_forget_gate.run();
930  if (!_run_cifg_opt)
931  {
932  _concat_weights_input_gate.run();
933  }
934  _concat_weights_output.run();
935  _is_prepared = true;
936  }
937 }
938 } // namespace arm_compute
arm_compute::CLPixelWiseMultiplication::validate
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of CLPixelWiseMultiplicatio...
Definition: CLPixelWiseMultiplication.cpp:80
arm_compute::CLLSTMLayer::prepare
void prepare() override
Prepare the function for executing.
Definition: CLLSTMLayer.cpp:925
arm_compute::CLArithmeticSubtraction::configure
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Definition: CLElementwiseOperations.cpp:106
arm_compute::CLArithmeticAddition::run
void run() override
Run the kernels contained in the function.
Definition: CLElementwiseOperations.cpp:81
arm_compute::MemoryGroup::manage
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:76
arm_compute::CLFill::run
void run() override
Run the kernels contained in the function.
Definition: CLFill.cpp:75
arm_compute::ITensorInfo::tensor_shape
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
arm_compute::RoundingPolicy::TO_NEAREST_EVEN
@ TO_NEAREST_EVEN
Rounds to nearest value; half rounds to nearest even.
arm_compute::PixelValue
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:35
arm_compute::TensorShape
Shape of a tensor.
Definition: TensorShape.h:39
arm_compute::ITensorAllocator::init
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Definition: ITensorAllocator.cpp:33
arm_compute::CLMeanStdDevNormalizationLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output=nullptr, float epsilon=1e-8f)
Static function to check if given info will lead to a valid configuration of CLMeanStdDevNormalizatio...
Definition: CLMeanStdDevNormalizationLayer.cpp:49
arm_compute::Window::DimX
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
arm_compute::CLPixelWiseMultiplication::configure
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
Definition: CLPixelWiseMultiplication.cpp:51
arm_compute::CLFullyConnectedLayer::run
void run() override
Run the kernels contained in the function.
Definition: CLFullyConnectedLayer.cpp:122
arm_compute::ICLTensor
Interface for OpenCL tensor.
Definition: ICLTensor.h:41
arm_compute::LSTMParams::cell_to_input_weights
T * cell_to_input_weights() const
Definition: LSTMParams.h:213
arm_compute::misc::shape_calculator::calculate_concatenate_shape
TensorShape calculate_concatenate_shape(const std::vector< T * > &input, size_t axis)
Calculate the concatenate output shape of the concatenate operation along a single axis.
Definition: ShapeCalculator.h:1494
arm_compute::CLActivationLayer::configure
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
Definition: CLActivationLayer.cpp:53
arm_compute::LSTMParams::cell_layer_norm_weights
T * cell_layer_norm_weights() const
Definition: LSTMParams.h:253
arm_compute::CLConcatenateLayer::validate
static Status validate(const std::vector< const ITensorInfo * > &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of CLConcatenateLayer.
Definition: CLConcatenateLayer.cpp:81
arm_compute::ITensorPack::add_tensor
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
Definition: ITensorPack.cpp:38
arm_compute::CLPixelWiseMultiplication::run
void run() override
Run the kernels contained in the function.
Definition: CLPixelWiseMultiplication.cpp:91
arm_compute::opencl::kernels::ClTransposeKernel
OpenCL kernel to transpose a tensor.
Definition: ClTransposeKernel.h:38
arm_compute::GEMMInfo
GEMM information class.
Definition: GEMMInfo.h:69
arm_compute::CLCopy::configure
void configure(ICLTensor *input, ICLTensor *output, Window *dst_window=nullptr)
Initialise the function's source and destination.
Definition: CLCopy.cpp:53
arm_compute::CLTensor
Basic implementation of the OpenCL tensor interface.
Definition: CLTensor.h:41
arm_compute::LSTMParams::output_layer_norm_weights
T * output_layer_norm_weights() const
Definition: LSTMParams.h:258
arm_compute::misc::shape_calculator::compute_transposed_shape
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
Definition: ShapeCalculator.h:415
arm_compute::TensorInfo::data_type
DataType data_type() const override
Data type used for each element of the tensor.
Definition: TensorInfo.h:253
arm_compute::CLKernelLibrary::get
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Definition: CLKernelLibrary.cpp:41
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:677
arm_compute::test::validation::forget_gate_bias
auto forget_gate_bias
Definition: LSTMLayerQuantized.cpp:481
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:952
arm_compute::CLLSTMLayer::run
void run() override
Run the kernels contained in the function.
Definition: CLLSTMLayer.cpp:822
arm_compute::test::validation::output_gate_bias
auto output_gate_bias
Definition: LSTMLayerQuantized.cpp:483
ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:205
arm_compute::ITensorInfo::dimension
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
arm_compute::ActivationLayerInfo
Activation Layer Information class.
Definition: ActivationLayerInfo.h:55
ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
arm_compute::ITensor::info
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
arm_compute::LSTMParams::cell_to_output_weights
T * cell_to_output_weights() const
Definition: LSTMParams.h:228
arm_compute::test::validation::recurrent_to_forget_weights
auto recurrent_to_forget_weights
Definition: LSTMLayerQuantized.cpp:477
ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
arm_compute::CLFullyConnectedLayer::configure
void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
Definition: CLFullyConnectedLayer.cpp:72
arm_compute::ITensorPack
Tensor packing service.
Definition: ITensorPack.h:39
arm_compute::LSTMParams::projection_weights
const T * projection_weights() const
Definition: LSTMParams.h:233
arm_compute::CLCompileContext
CLCompileContext class.
Definition: CLCompileContext.h:204
arm_compute::CLGEMM::validate
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
Definition: CLGEMM.cpp:110
arm_compute::CLLSTMLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in, const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
Static function to check if given info will lead to a valid configuration of CLLSTMLayer.
Definition: CLLSTMLayer.cpp:539
ARM_COMPUTE_RETURN_ERROR_ON
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:298
CLFillBorderKernel.h
arm_compute::ACL_DST
@ ACL_DST
Definition: Types.h:55
arm_compute::CLConcatenateLayer::configure
void configure(std::vector< const ICLTensor * > &inputs_vector, ICLTensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
Definition: CLConcatenateLayer.cpp:53
arm_compute::Status
Status class.
Definition: Error.h:52
arm_compute::CLArithmeticAddition::configure
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Definition: CLElementwiseOperations.cpp:52
InfoHelpers.h
arm_compute::ConvertPolicy::SATURATE
@ SATURATE
Saturate.
CLScheduler.h
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
arm_compute::CLLSTMLayer::~CLLSTMLayer
~CLLSTMLayer()
Default destructor.
arm_compute::LSTMParams::use_layer_norm
bool use_layer_norm() const
Definition: LSTMParams.h:318
arm_compute::CLFill::configure
void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window=nullptr)
Initialize the kernel's tensor and filling value.
Definition: CLFill.cpp:52
arm_compute::utils::info_helpers::build_lstm_params_tensor_info
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor.
Definition: InfoHelpers.h:73
arm_compute::test::validation::recurrent_to_output_weights
auto recurrent_to_output_weights
Definition: LSTMLayerQuantized.cpp:479
arm_compute::CLActivationLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
Definition: CLActivationLayer.cpp:73
arm_compute::LSTMParams::has_peephole_opt
bool has_peephole_opt() const
Definition: LSTMParams.h:303
arm_compute::CLTensorAllocator::allocate
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Definition: CLTensorAllocator.cpp:131
arm_compute::test::validation::pack
ITensorPack pack
Definition: Im2Col.cpp:188
arm_compute::CLArithmeticAddition::validate
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
Definition: CLElementwiseOperations.cpp:72
arm_compute::utils::info_helpers
Definition: InfoHelpers.h:35
arm_compute::CLTensor::allocator
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: CLTensor.cpp:61
arm_compute::LSTMParams::has_cifg_opt
bool has_cifg_opt() const
Definition: LSTMParams.h:313
arm_compute::CLLSTMLayer::configure
void configure(const ICLTensor *input, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, const ICLTensor *output_state_in, ICLTensor *cell_state_in, ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output, const LSTMParams< ICLTensor > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
Initialize function's tensors.
Definition: CLLSTMLayer.cpp:132
arm_compute::CLScheduler::get
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:112
arm_compute::CLGEMM::run
void run() override
Run the kernels contained in the function.
Definition: CLGEMM.cpp:121
arm_compute::LSTMParams::recurrent_to_input_weights
const T * recurrent_to_input_weights() const
Definition: LSTMParams.h:208
arm_compute::CLConcatenateLayer::run
void run() override
Run the kernels contained in the function.
Definition: CLConcatenateLayer.cpp:88
arm_compute::CLLSTMLayer::CLLSTMLayer
CLLSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
Definition: CLLSTMLayer.cpp:42
AsymmHelpers.h
Utils.h
arm_compute::CLGEMM::configure
void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
Definition: CLGEMM.cpp:75
ShapeCalculator.h
arm_compute::TensorInfo
Store the tensor's metadata.
Definition: TensorInfo.h:41
arm_compute::LSTMParams::input_layer_norm_weights
T * input_layer_norm_weights() const
Definition: LSTMParams.h:243
arm_compute::CLCopy::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *dst_window=nullptr)
Static function to check if given info will lead to a valid configuration of CLCopy.
Definition: CLCopy.cpp:70
arm_compute::CLFullyConnectedLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of CLFullyConnectedLayer.
Definition: CLFullyConnectedLayer.cpp:113
arm_compute::MemoryGroupResourceScope
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::CLArithmeticSubtraction::run
void run() override
Run the kernels contained in the function.
Definition: CLElementwiseOperations.cpp:138
arm_compute::LSTMParams::forget_layer_norm_weights
T * forget_layer_norm_weights() const
Definition: LSTMParams.h:248
arm_compute::DataType::F16
@ F16
16-bit floating-point number
arm_compute::LSTMParams::has_projection
bool has_projection() const
Definition: LSTMParams.h:308
arm_compute::CLActivationLayer::run
void run() override
Run the kernels contained in the function.
Definition: CLActivationLayer.cpp:78
arm_compute::LSTMParams::cell_to_forget_weights
T * cell_to_forget_weights() const
Definition: LSTMParams.h:223
Log.h
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
arm_compute::CLScheduler::enqueue_op
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush=true)
Schedule the execution of the passed kernel if possible.
Definition: CLScheduler.cpp:238
arm_compute::CLTensor::info
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: CLTensor.cpp:41
arm_compute::LSTMParams
Definition: LSTMParams.h:36
arm_compute::ACL_SRC
@ ACL_SRC
Definition: Types.h:44
arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:44
arm_compute::DataType::F32
@ F32
32-bit floating-point number
arm_compute::misc::shape_calculator
Definition: ShapeCalculator.h:41
arm_compute::CLCopy::run
void run() override
Run the kernels contained in the function.
Definition: CLCopy.cpp:75
arm_compute::test::validation::input_to_forget_weights
auto input_to_forget_weights
Definition: LSTMLayerQuantized.cpp:473
ClTransposeKernel.h
arm_compute::LSTMParams::projection_bias
const T * projection_bias() const
Definition: LSTMParams.h:238
ARM_COMPUTE_LOG_PARAMS
#define ARM_COMPUTE_LOG_PARAMS(...)
Definition: Log.h:35
Validate.h
arm_compute::test::validation::input_to_output_weights
auto input_to_output_weights
Definition: LSTMLayerQuantized.cpp:475
arm_compute::CLMeanStdDevNormalizationLayer::configure
void configure(ICLTensor *input, ICLTensor *output=nullptr, float epsilon=1e-8f)
Initialise the function's input and outputs.
Definition: CLMeanStdDevNormalizationLayer.cpp:33
arm_compute::LSTMParams::input_to_input_weights
const T * input_to_input_weights() const
Definition: LSTMParams.h:203
arm_compute::TensorInfo::tensor_shape
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:245
arm_compute::test::validation::input_to_cell_weights
auto input_to_cell_weights
Definition: LSTMLayerQuantized.cpp:474
arm_compute::test::validation::recurrent_to_cell_weights
auto recurrent_to_cell_weights
Definition: LSTMLayerQuantized.cpp:478
arm_compute::ICLSimpleFunction::run
void run() override final
Run the kernels contained in the function.
Definition: ICLSimpleFunction.cpp:42
CLLSTMLayer.h
arm_compute::test::validation::input
auto input
Definition: LSTMLayerQuantized.cpp:486
arm_compute::ITensorInfo::num_dimensions
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
arm_compute::CLArithmeticSubtraction::validate
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
Definition: CLElementwiseOperations.cpp:129
arm_compute::LSTMParams::input_gate_bias
const T * input_gate_bias() const
Definition: LSTMParams.h:218