Compute Library
 20.08
NELSTMLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "arm_compute/core/Utils.h"
32 
33 namespace arm_compute
34 {
36 using namespace arm_compute::utils::info_helpers;
37 
38 NELSTMLayer::NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
39  : _memory_group(std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(),
40  _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(),
41  _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(), _pixelwise_mul_cell_state2(), _fully_connected_output(),
42  _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(), _fully_connected_output_state(), _projection_clip(),
43  _copy_cell_state(), _copy_output(), _concat_scratch_buffer(), _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(),
44  _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(),
45  _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(),
46  _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(),
47  _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(),
48  _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(),
49  _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false),
50  _is_layer_norm_lstm(false)
51 {
52 }
53 
57  const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
58  const ITensor *output_state_in, const ITensor *cell_state_in,
59  ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output,
60  const LSTMParams<ITensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold, float projection_threshold)
61 {
66  output_state_in, cell_state_in,
67  scratch_buffer, output_state_out, cell_state_out, output);
68 
69  _is_layer_norm_lstm = lstm_params.use_layer_norm();
70 
71  // Set lstm parameters
72  LSTMParams<ITensorInfo> lstm_params_info{};
73  build_lstm_params_tensor_info(lstm_params, &lstm_params_info);
74 
75  // Validate
79  forget_gate_bias->info(), cell_bias->info(), output_gate_bias->info(),
80  output_state_in->info(), cell_state_in->info(),
81  scratch_buffer->info(), output_state_out->info(), cell_state_out->info(), output->info(),
82  lstm_params_info, activation_info, cell_threshold, projection_threshold));
83 
84  const TensorShape cell_state_shape = cell_state_in->info()->tensor_shape();
85 
86  // Configure block that calculates the forget gate
87  // forget_gate = Activation(input * input_to_forget_weights + output_state_in * recurrent_to_forget_weights + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias)
88  // We optimize this as follows:
89  // forget_gate = Activation( (input,output_state_in) * (input_to_forget_weights,recurrent_to_forget_weights) + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias)
90  _forget_gate_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
91  _forget_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
92  _forget_gate_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
93 
94  std::vector<const ITensor *> inputs_vector;
95  inputs_vector.emplace_back(input);
96  inputs_vector.emplace_back(output_state_in);
97 
98  _memory_group.manage(&_forget_gate_out2);
99  _concat_inputs_forget_gate.configure(inputs_vector, &_forget_gate_out2, Window::DimX);
100 
101  std::vector<const ITensor *> weights_vector;
102 
103  weights_vector.emplace_back(input_to_forget_weights);
104  weights_vector.emplace_back(recurrent_to_forget_weights);
105 
106  _concat_weights_forget_gate.configure(weights_vector, &_forget_gate_out6, Window::DimX);
107 
108  _memory_group.manage(&_forget_gate_out5);
109  _fully_connected_forget_gate.configure(&_forget_gate_out2, &_forget_gate_out6, (_is_layer_norm_lstm) ? nullptr : forget_gate_bias, &_forget_gate_out5);
110  _memory_group.manage(&_forget_gate_out1);
111  _memory_group.manage(&_forget_gate_out3);
112  _forget_gate_out6.allocator()->allocate();
113 
114  Tensor *forget_gate_out = &_forget_gate_out5;
115  if(lstm_params.has_peephole_opt())
116  {
117  _forget_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
118 
119  _run_peephole_opt = true;
120  _memory_group.manage(&_forget_gate_out4);
121  _pixelwise_mul_forget_gate.configure(cell_state_in, lstm_params.cell_to_forget_weights(), &_forget_gate_out4, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
122  _accum_forget_gate1.configure(&_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3, ConvertPolicy::SATURATE);
123  _forget_gate_out4.allocator()->allocate();
124  _forget_gate_out5.allocator()->allocate();
125  forget_gate_out = &_forget_gate_out3;
126  }
127  else
128  {
129  _forget_gate_out3.allocator()->allocate();
130  }
131  if(_is_layer_norm_lstm)
132  {
133  _forget_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
134  _forget_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
135  _memory_group.manage(&_forget_layer_norm_out1);
136  _memory_group.manage(&_forget_layer_norm_out2);
137  _mean_std_norm_forget_gate.configure(forget_gate_out);
138  _pixelwise_mul_forget_gate_coeff.configure(forget_gate_out, lstm_params.forget_layer_norm_weights(), &_forget_layer_norm_out1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
139  // forget_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
140  forget_gate_out->allocator()->allocate();
141  _accum_forget_gate_bias.configure(&_forget_layer_norm_out1, forget_gate_bias, &_forget_layer_norm_out2, ConvertPolicy::SATURATE);
142  _forget_layer_norm_out1.allocator()->allocate();
143  forget_gate_out = &_forget_layer_norm_out2;
144  }
145  _activation_forget_gate.configure(forget_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
146 
147  // Configure block that calculates the input gate
148  // input_gate = Activation(input * input_to_input_weights + output_state * recurrent_to_input_weights + PixelWiseMul(cell_state, cell_to_input_weights) + input_gate_bias), without CIFG
149  // input_gate = 1 - forget_gate, with CIFG
150  // We optimize this as follows:
151  // input_gate = Activation((input,output_state) * (input_to_input_weights,recurrent_to_input_weights) + PixelWiseMul(cell_state, cell_to_input_weights) + input_gate_bias), without CIFG
152  _input_gate_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
153  Tensor *input_gate_out = &_input_gate_out1;
154  if(lstm_params.has_cifg_opt())
155  {
156  _memory_group.manage(&_input_gate_out1);
157  _ones.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
158  _subtract_input_gate.configure(&_ones, forget_gate_out, &_input_gate_out1, ConvertPolicy::SATURATE);
159  _ones.allocator()->allocate();
160  _run_cifg_opt = true;
161  }
162  else
163  {
164  _input_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
165  _input_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
166 
167  std::vector<const ITensor *> lstm_weights;
168  lstm_weights.emplace_back(lstm_params.input_to_input_weights());
169  lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights());
170 
171  _concat_weights_input_gate.configure(lstm_weights, &_input_gate_out2, Window::DimX);
172 
173  _memory_group.manage(&_input_gate_out1);
174  _memory_group.manage(&_input_gate_out4);
175 
176  _fully_connected_input_gate.configure(&_forget_gate_out2, &_input_gate_out2, (_is_layer_norm_lstm) ? nullptr : lstm_params.input_gate_bias(), &_input_gate_out3);
177  _input_gate_out2.allocator()->allocate();
178  input_gate_out = &_input_gate_out3;
179 
180  if(_run_peephole_opt)
181  {
182  _memory_group.manage(&_input_gate_out4);
183  _pixelwise_mul_input_gate.configure(cell_state_in, lstm_params.cell_to_input_weights(), &_input_gate_out4, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
184  _accum_input_gate1.configure(&_input_gate_out3, &_input_gate_out4, &_input_gate_out1, ConvertPolicy::SATURATE);
185  _input_gate_out3.allocator()->allocate();
186  _input_gate_out4.allocator()->allocate();
187  input_gate_out = &_input_gate_out1;
188  }
189  else
190  {
191  _input_gate_out1.allocator()->allocate();
192  }
193 
194  if(_is_layer_norm_lstm)
195  {
196  _input_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
197  _input_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
198  _memory_group.manage(&_input_layer_norm_out1);
199  _memory_group.manage(&_input_layer_norm_out2);
200  _mean_std_norm_input_gate.configure(input_gate_out);
201  _pixelwise_mul_input_gate_coeff.configure(input_gate_out, lstm_params.input_layer_norm_weights(), &_input_layer_norm_out1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
202  // input_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
203  input_gate_out->allocator()->allocate();
204  _accum_input_gate_bias.configure(&_input_layer_norm_out1, lstm_params.input_gate_bias(), &_input_layer_norm_out2, ConvertPolicy::SATURATE);
205  _input_layer_norm_out1.allocator()->allocate();
206  input_gate_out = &_input_layer_norm_out2;
207  }
208  _activation_input_gate.configure(input_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
209  }
210 
211  // Configure block that calculates the cell state
212  // cell_state = Clip((PixelwiseMul(input_gate, Activation(input * input_to_cell_weights + output_state_in * recurrent_to_cell_weights + cell_bias)) + PixelwiseMul(forget_gate, cell_state)), cell_threshold)
214  _cell_state_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
215  _cell_state_out2.allocator()->init(TensorInfo(cell_state1_shape, 1, input->info()->data_type()));
216  _cell_state_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
217  _cell_state_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
218  _cell_state_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
219 
220  _memory_group.manage(&_cell_state_out1);
221  _fully_connected_cell_state.configure(input, input_to_cell_weights, (_is_layer_norm_lstm) ? nullptr : cell_bias, &_cell_state_out1);
222  _memory_group.manage(&_cell_state_out2);
223  _transpose_cell_state.configure(recurrent_to_cell_weights, &_cell_state_out2);
224  _memory_group.manage(&_cell_state_out3);
225  _gemm_cell_state1.configure(output_state_in, &_cell_state_out2, nullptr, &_cell_state_out3, 1.f, 0.f);
226  _cell_state_out2.allocator()->allocate();
227  _memory_group.manage(&_cell_state_out4);
228  _accum_cell_state1.configure(&_cell_state_out1, &_cell_state_out3, &_cell_state_out4, ConvertPolicy::SATURATE);
229  Tensor *cell_state_out_ptr = &_cell_state_out4;
230  if(_is_layer_norm_lstm)
231  {
232  _cell_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
233  _cell_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
234  _memory_group.manage(&_cell_layer_norm_out1);
235  _memory_group.manage(&_cell_layer_norm_out2);
236  _mean_std_norm_cell_gate.configure(cell_state_out_ptr);
237  _pixelwise_mul_cell_gate_coeff.configure(cell_state_out_ptr, lstm_params.cell_layer_norm_weights(), &_cell_layer_norm_out1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
238  // cell_state_out_ptr is going to be reassigned, so allocate the tensor that it was assigned to before
239  cell_state_out_ptr->allocator()->allocate();
240  _accum_cell_gate_bias.configure(&_cell_layer_norm_out1, cell_bias, &_cell_layer_norm_out2, ConvertPolicy::SATURATE);
241  _cell_layer_norm_out1.allocator()->allocate();
242  cell_state_out_ptr = &_cell_layer_norm_out2;
243  }
244  _activation_cell_state.configure(cell_state_out_ptr, nullptr, activation_info);
245  _memory_group.manage(&_cell_state_out5);
246  _pixelwise_mul_cell_state1.configure(cell_state_out_ptr, input_gate_out, &_cell_state_out5, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
247  cell_state_out_ptr->allocator()->allocate();
248  _pixelwise_mul_cell_state2.configure(forget_gate_out, cell_state_in, &_cell_state_out3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
249  _accum_cell_state2.configure(&_cell_state_out5, &_cell_state_out3, &_cell_state_out1, ConvertPolicy::SATURATE);
250  _cell_state_out3.allocator()->allocate();
251  _cell_state_out5.allocator()->allocate();
252  // Perform clipping
253  if(cell_threshold != 0.f)
254  {
255  _perform_cell_clipping = true;
256  _cell_clip.configure(&_cell_state_out1, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, cell_threshold));
257  }
258 
259  // Configure block that calculates the output
260  // output_state_out = Activation(input * input_to_output_weights + output_state_in * recurrent_to_output_weights + PixelWiseMul(cell_state, cell_to_output_weights) + output_gate_bias)
261  // We optimize this as follows:
262  // output_state_out = Activation( (input,output_state_in) * (input_to_output_weights, recurrent_to_output_weights) + PixelWiseMul(cell_state, cell_to_output_weights) + output_gate_bias)
263  _output1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
264  _output4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
265 
266  std::vector<const ITensor *> in_out_weights;
267  in_out_weights.emplace_back(input_to_output_weights);
268  in_out_weights.emplace_back(recurrent_to_output_weights);
269 
270  _concat_weights_output.configure(in_out_weights, &_output2, Window::DimX);
271  _memory_group.manage(&_output1);
272  _memory_group.manage(&_output4);
273 
274  _fully_connected_output.configure(&_forget_gate_out2, &_output2, (_is_layer_norm_lstm) ? nullptr : output_gate_bias, &_output4);
275 
276  _output2.allocator()->allocate();
277  _forget_gate_out2.allocator()->allocate();
278 
279  Tensor *output_gate_out = &_output4;
280  if(lstm_params.has_peephole_opt())
281  {
282  _output3.allocator()->init(TensorInfo(_cell_state_out1.info()->tensor_shape(), 1, input->info()->data_type()));
283 
284  _memory_group.manage(&_output3);
285  _pixelwise_mul_output_state1.configure(&_cell_state_out1, lstm_params.cell_to_output_weights(), &_output3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
286  _accum_output1.configure(&_output4, &_output3, &_output1, ConvertPolicy::SATURATE);
287  _output4.allocator()->allocate();
288  output_gate_out = &_output1;
289 
290  // Allocate intermediate buffers
291  _output3.allocator()->allocate();
292  }
293  else
294  {
295  _output1.allocator()->allocate();
296  }
297  if(_is_layer_norm_lstm)
298  {
299  _output_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
300  _output_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
301  _memory_group.manage(&_output_layer_norm_out1);
302  _memory_group.manage(&_output_layer_norm_out2);
303  _mean_std_norm_output_gate.configure(output_gate_out);
304  _pixelwise_mul_output_gate_coeff.configure(output_gate_out, lstm_params.output_layer_norm_weights(), &_output_layer_norm_out1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
305  // output_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
306  output_gate_out->allocator()->allocate();
307  _accum_output_gate_bias.configure(&_output_layer_norm_out1, output_gate_bias, &_output_layer_norm_out2, ConvertPolicy::SATURATE);
308  _output_layer_norm_out1.allocator()->allocate();
309  output_gate_out = &_output_layer_norm_out2;
310  }
311  _activation_output.configure(output_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
312 
313  // Configure block that calculates the output state
314  /** lstm_res = PixelwiseMul(output, Activation(cell_state))
315  *
316  * -- Clip(lstm_res * projection_weights + projection_bias, projection_threshold) , if there is a projection
317  * /
318  * output_state = --
319  * \
320  * -- lstm_res , otherwise
321  */
322  ITensor *output_state_out_tmp = lstm_params.has_projection() ? &_output_state1 : output_state_out;
323  _cell_state_activation.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
324  _output_state1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
325 
326  _memory_group.manage(&_cell_state_activation);
327  _activation_output_state.configure(&_cell_state_out1, &_cell_state_activation, activation_info);
328  _pixelwise_mul_output_state2.configure(&_cell_state_activation, output_gate_out, output_state_out_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
329  _cell_state_activation.allocator()->allocate();
330  output_gate_out->allocator()->allocate();
331 
332  if(lstm_params.has_projection())
333  {
334  _has_projection_weights = true;
335  _fully_connected_output_state.configure(output_state_out_tmp, lstm_params.projection_weights(), lstm_params.projection_bias(), output_state_out);
336  _output_state1.allocator()->allocate();
337  // Perform clipping
338  if(projection_threshold != 0.f)
339  {
340  _perform_projection_clipping = true;
341  _projection_clip.configure(output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold));
342  }
343  }
344 
345  // Copy cell state and output
346  _copy_cell_state.configure(&_cell_state_out1, cell_state_out);
347  _copy_output.configure(output_state_out, output);
348 
349  // Vector for holding the tensors to store in scratch buffer
350  std::vector<const ITensor *> scratch_inputs;
351  if(!lstm_params.has_cifg_opt())
352  {
353  scratch_inputs.emplace_back(input_gate_out);
354  }
355  scratch_inputs.emplace_back(&_cell_state_out1);
356  scratch_inputs.emplace_back(forget_gate_out);
357  scratch_inputs.emplace_back(output_gate_out);
358  _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer, Window::DimX);
359  input_gate_out->allocator()->allocate();
360  _cell_state_out1.allocator()->allocate();
361  forget_gate_out->allocator()->allocate();
362  output_gate_out->allocator()->allocate();
363 }
364 
368  const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
369  const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in,
370  const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output,
371  const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold, float projection_threshold)
372 {
377  output_state_in, cell_state_in,
378  scratch_buffer, output_state_out, cell_state_out, output);
379 
380  // Check data types
386  output_state_in, cell_state_in,
387  scratch_buffer, output_state_out, cell_state_out, output);
388 
389  // Check dimensions
390  ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
392  ARM_COMPUTE_RETURN_ERROR_ON(input_to_cell_weights->num_dimensions() > 2);
397  ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->num_dimensions() > 1);
399  ARM_COMPUTE_RETURN_ERROR_ON(output_gate_bias->num_dimensions() > 1);
400  ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() > 2);
401  ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->num_dimensions() > 2);
402  ARM_COMPUTE_RETURN_ERROR_ON(scratch_buffer->num_dimensions() > 2);
403  ARM_COMPUTE_RETURN_ERROR_ON(output_state_out->num_dimensions() > 2);
404  ARM_COMPUTE_RETURN_ERROR_ON(cell_state_out->num_dimensions() > 2);
406  ARM_COMPUTE_RETURN_ERROR_ON(cell_bias->dimension(0) * 4 != scratch_buffer->dimension(0)
407  && cell_bias->dimension(0) * 3 != scratch_buffer->dimension(0));
408 
409  const unsigned int num_batches = input->dimension(1);
410  const unsigned int num_cells = input_to_output_weights->dimension(1);
411 
412  if(lstm_params.use_layer_norm())
413  {
414  // If CIFG is used, input layer normalization weights tensor is omitted
415  if(lstm_params.has_cifg_opt())
416  {
417  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights() != nullptr);
418  }
419  else
420  {
422  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights()->num_dimensions() > 1);
423  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights()->dimension(0) != num_cells);
425  }
426 
429  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_layer_norm_weights()->num_dimensions() > 1);
430  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_layer_norm_weights()->num_dimensions() > 1);
431  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_layer_norm_weights()->num_dimensions() > 1);
432  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_layer_norm_weights()->dimension(0) != num_cells);
433  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_layer_norm_weights()->dimension(0) != num_cells);
434  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_layer_norm_weights()->dimension(0) != num_cells);
435  }
436 
437  // Check peephole optimization
438  if(lstm_params.has_peephole_opt())
439  {
441  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->num_dimensions() > 1);
442  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_output_weights()->num_dimensions() > 1);
443  }
444 
446  TensorShape num_units_transposed_shape = compute_transposed_shape(*forget_gate_bias);
447  const TensorInfo units_out_transposed_info = TensorInfo(units_out_transposed_shape, 1, input->data_type());
448  const TensorInfo num_units_transposed_info = TensorInfo(num_units_transposed_shape, 1, input->data_type());
449 
450  TensorInfo input_gate = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
451  TensorInfo forget_gate = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
452  TensorInfo output_gate_tmp = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
453  TensorInfo cell_state_tmp = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
454 
455  std::vector<const ITensorInfo *> inputs_vector;
456  inputs_vector.emplace_back(input);
457  inputs_vector.emplace_back(output_state_in);
459  TensorInfo forget_gate_concat = TensorInfo(concat_shape, 1, input->data_type());
460  ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(inputs_vector, &forget_gate_concat, Window::DimX));
461 
462  // Validate forget gate
464 
465  if(lstm_params.has_peephole_opt())
466  {
468  ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_gate, &forget_gate, &forget_gate, ConvertPolicy::SATURATE));
469  }
470  if(lstm_params.use_layer_norm())
471  {
476  }
478 
479  // Validate input gate
480  if(!lstm_params.has_cifg_opt())
481  {
483  lstm_params.recurrent_to_input_weights(),
484  lstm_params.input_gate_bias());
485  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_to_input_weights()->num_dimensions() > 2);
486  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.recurrent_to_input_weights()->num_dimensions() > 2);
487  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_gate_bias()->num_dimensions() > 1);
488 
489  std::vector<const ITensorInfo *> lstm_weights;
490  lstm_weights.emplace_back(lstm_params.input_to_input_weights());
491  lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights());
492  TensorShape lstm_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(lstm_weights, 0);
493  TensorInfo lstm_gate_concat = TensorInfo(lstm_weights_concat_shape, 1, input->data_type());
494  ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(lstm_weights, &lstm_gate_concat, Window::DimX));
495  ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, lstm_params.input_to_input_weights(), (lstm_params.use_layer_norm()) ? nullptr : lstm_params.input_gate_bias(), &input_gate));
496 
497  if(lstm_params.has_peephole_opt())
498  {
500  ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_input_weights()->num_dimensions() > 1);
503  }
504 
505  if(lstm_params.use_layer_norm())
506  {
510  }
512  }
513  else
514  {
516  }
517 
518  // Validate cell state
519  ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, input_to_cell_weights, (lstm_params.use_layer_norm()) ? nullptr : cell_bias, &cell_state_tmp));
520  ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate(output_state_in, &units_out_transposed_info, nullptr, &cell_state_tmp, 1.f, 0.f, GEMMInfo()));
521  ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE));
522  if(lstm_params.use_layer_norm())
523  {
527  ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&cell_state_tmp, cell_bias, &cell_state_tmp, ConvertPolicy::SATURATE));
528  }
529  ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&cell_state_tmp, nullptr, activation_info));
532  ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE));
533  if(cell_threshold != 0.f)
534  {
536  cell_threshold)));
537  }
538 
539  // Validate output gate tmp
540  std::vector<const ITensorInfo *> in_out_weights;
541  in_out_weights.emplace_back(input_to_output_weights);
542  in_out_weights.emplace_back(recurrent_to_output_weights);
543  TensorShape in_out_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(in_out_weights, 0);
544  TensorInfo in_out_gate_concat = TensorInfo(in_out_weights_concat_shape, 1, input->data_type());
545  ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(in_out_weights, &in_out_gate_concat, Window::DimX));
546 
548 
549  if(lstm_params.has_peephole_opt())
550  {
553  ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&output_gate_tmp, &output_gate_tmp, &output_gate_tmp, ConvertPolicy::SATURATE));
554  }
555  if(lstm_params.use_layer_norm())
556  {
561  }
563 
564  // Validate output state
565  ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&cell_state_tmp, &cell_state_tmp, activation_info));
567  if(lstm_params.has_projection())
568  {
569  ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(&output_gate_tmp, lstm_params.projection_weights(), lstm_params.projection_bias(), output_state_out));
570  if(projection_threshold != 0.f)
571  {
572  ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output_state_out, output_state_out,
573  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold)));
574  }
575  }
576 
577  // Validate copy kernel
578  ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(&cell_state_tmp, cell_state_out));
579  ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output));
580 
581  // Validate scratch concatenation
582  std::vector<const ITensorInfo *> inputs_vector_info_raw;
583  if(!lstm_params.has_cifg_opt())
584  {
585  inputs_vector_info_raw.push_back(&input_gate);
586  }
587  inputs_vector_info_raw.push_back(&cell_state_tmp);
588  inputs_vector_info_raw.push_back(&forget_gate);
589  inputs_vector_info_raw.push_back(&output_gate_tmp);
590 
591  ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer, Window::DimX));
592  return Status{};
593 }
594 
596 {
597  prepare();
598 
599  MemoryGroupResourceScope scope_mg(_memory_group);
600 
601  _concat_inputs_forget_gate.run();
602  _fully_connected_forget_gate.run();
603 
604  if(_run_peephole_opt)
605  {
606  _pixelwise_mul_forget_gate.run();
607  _accum_forget_gate1.run();
608  }
609  if(_is_layer_norm_lstm)
610  {
611  _mean_std_norm_forget_gate.run();
612  _pixelwise_mul_forget_gate_coeff.run();
613  _accum_forget_gate_bias.run();
614  }
615  _activation_forget_gate.run();
616 
617  if(_run_cifg_opt)
618  {
619  if(_ones.info()->data_type() == DataType::F16)
620  {
621  std::fill_n(reinterpret_cast<half *>(_ones.buffer()), _ones.info()->total_size() / _ones.info()->element_size(), 1);
622  }
623  else
624  {
625  std::fill_n(reinterpret_cast<float *>(_ones.buffer()), _ones.info()->total_size() / _ones.info()->element_size(), 1);
626  }
627  _subtract_input_gate.run();
628  }
629  else
630  {
631  _fully_connected_input_gate.run();
632 
633  if(_run_peephole_opt)
634  {
635  _pixelwise_mul_input_gate.run();
636  _accum_input_gate1.run();
637  }
638 
639  if(_is_layer_norm_lstm)
640  {
641  _mean_std_norm_input_gate.run();
642  _pixelwise_mul_input_gate_coeff.run();
643  _accum_input_gate_bias.run();
644  }
645  _activation_input_gate.run();
646  }
647 
648  _fully_connected_cell_state.run();
649  NEScheduler::get().schedule(&_transpose_cell_state, Window::DimY);
650  _gemm_cell_state1.run();
651  _accum_cell_state1.run();
652  if(_is_layer_norm_lstm)
653  {
654  _mean_std_norm_cell_gate.run();
655  _pixelwise_mul_cell_gate_coeff.run();
656  _accum_cell_gate_bias.run();
657  }
658  _activation_cell_state.run();
659  _pixelwise_mul_cell_state1.run();
660  _pixelwise_mul_cell_state2.run();
661  _accum_cell_state2.run();
662 
663  if(_perform_cell_clipping)
664  {
665  _cell_clip.run();
666  }
667 
668  _fully_connected_output.run();
669  if(_run_peephole_opt)
670  {
671  _pixelwise_mul_output_state1.run();
672  _accum_output1.run();
673  }
674  if(_is_layer_norm_lstm)
675  {
676  _mean_std_norm_output_gate.run();
677  _pixelwise_mul_output_gate_coeff.run();
678  _accum_output_gate_bias.run();
679  }
680  _activation_output.run();
681 
682  _activation_output_state.run();
683  _pixelwise_mul_output_state2.run();
684 
685  if(_has_projection_weights)
686  {
687  _fully_connected_output_state.run();
688  if(_perform_projection_clipping)
689  {
690  _projection_clip.run();
691  }
692  }
693 
694  NEScheduler::get().schedule(&_copy_cell_state, Window::DimY);
695  NEScheduler::get().schedule(&_copy_output, Window::DimY);
696 
697  _concat_scratch_buffer.run();
698 }
699 
701 {
702  if(!_is_prepared)
703  {
704  _concat_weights_forget_gate.run();
705  if(!_run_cifg_opt)
706  {
707  _concat_weights_input_gate.run();
708  }
709  _concat_weights_output.run();
710  _is_prepared = true;
711  }
712 }
713 } // namespace arm_compute
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
const T * projection_weights() const
Definition: LSTMParams.h:227
void run() override
Run the kernels contained in the function.
const T * input_to_input_weights() const
Definition: LSTMParams.h:197
void run() override
Run the kernels contained in the function.
Shape of a tensor.
Definition: TensorShape.h:39
TensorShape calculate_concatenate_shape(const std::vector< T * > &input, size_t axis)
Calculate the concatenate output shape of the concatenate operation along a single axis.
void run() override final
Run the kernels contained in the function.
bool use_layer_norm() const
Definition: LSTMParams.h:312
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticAddition.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in, const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
Static function to check if given info will lead to a valid configuration of NELSTMLayer.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool has_peephole_opt() const
Definition: LSTMParams.h:297
T * forget_layer_norm_weights() const
Definition: LSTMParams.h:242
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor.
Definition: InfoHelpers.h:71
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
1 channel, 1 F32 per channel
bool has_cifg_opt() const
Definition: LSTMParams.h:307
void configure(const ITensor *input, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, const ITensor *output_state_in, const ITensor *cell_state_in, ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output, const LSTMParams< ITensor > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
Initialize function's tensors.
Definition: NELSTMLayer.cpp:54
Store the tensor's metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
T * cell_to_input_weights() const
Definition: LSTMParams.h:207
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1517
Interface for NEON tensor.
Definition: ITensor.h:36
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Copyright (c) 2017-2020 Arm Limited.
1 channel, 1 F16 per channel
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: Tensor.cpp:48
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: Tensor.cpp:33
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
void configure(ITensor *input, ITensor *output=nullptr, float epsilon=1e-8f)
Initialise the function's input and outputs.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
const T * recurrent_to_input_weights() const
Definition: LSTMParams.h:202
void run() override
Run the kernels contained in the function.
const T * projection_bias() const
Definition: LSTMParams.h:232
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMM.
Definition: NEGEMM.cpp:163
void run() override
Run the kernels contained in the function.
Definition: NEGEMM.cpp:281
T * output_layer_norm_weights() const
Definition: LSTMParams.h:252
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
static Status validate(const std::vector< const ITensorInfo * > &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of NEConcatenateLayer.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Basic implementation of the tensor interface.
Definition: Tensor.h:37
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
NELSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
Definition: NELSTMLayer.cpp:38
void prepare() override
Prepare the function for executing.
void configure(const ITensor *input, ITensor *output, const PaddingList &padding=PaddingList())
Initialize the kernel's input, output.
T * cell_to_forget_weights() const
Definition: LSTMParams.h:217
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
bool has_projection() const
Definition: LSTMParams.h:302
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding=PaddingList())
Static function to check if given info will lead to a valid configuration of NECopyKernel.
T * cell_to_output_weights() const
Definition: LSTMParams.h:222
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEPixelWiseMultiplicatio...
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
T * input_layer_norm_weights() const
Definition: LSTMParams.h:237
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
void run() override
Run the kernels contained in the function.
void configure(std::vector< const ITensor * > inputs_vector, ITensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of NEFullyConnectedLayer.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
Definition: NEGEMM.cpp:51
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
const T * input_gate_bias() const
Definition: LSTMParams.h:212
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory.
Definition: Tensor.cpp:43
static Status validate(const ITensorInfo *input, const ITensorInfo *output=nullptr, float epsilon=1e-8f)
Static function to check if given info will lead to a valid configuration of NEMeanStdDevNormalizatio...
Store the tensor's metadata.
Definition: TensorInfo.h:45
GEMM information class.
Definition: Types.h:1932
T * cell_layer_norm_weights() const
Definition: LSTMParams.h:247
Truncates the least significant values that are lost in operations.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's input and output.
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:95
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticSubtraction.