Compute Library
 21.02
NEDirectConvolutionLayerOutputStageKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "arm_compute/core/Error.h"
29 #include "arm_compute/core/Types.h"
34 #include "src/core/CPP/Validate.h"
35 #include "src/core/NEON/NEAsymm.h"
40 
41 #include <arm_neon.h>
42 #include <cstddef>
43 #include <cstdint>
44 
45 namespace arm_compute
46 {
47 namespace
48 {
49 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output,
50  const DirectConvolutionLayerOutputStageKernelInfo &info)
51 {
54  ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
56 
57  if(bias != nullptr)
58  {
60  ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)));
61  ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
62  }
63 
64  if(input->data_type() == DataType::S32)
65  {
66  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output == nullptr, "In-place computation not allowed for quantized output");
67  }
68 
69  // Checks performed when output is configured
70  if((output != nullptr) && (output->total_size() != 0))
71  {
72  if(is_data_type_float(input->data_type()))
73  {
75  }
76  else
77  {
79  }
81  }
82  else if(input->data_type() == DataType::S32)
83  {
84  // In case of quantized computation and unconfigured output, the output data type must be provided through DirectConvolutionLayerOutputStageKernelInfo
85  ARM_COMPUTE_RETURN_ERROR_ON((info.output_data_type != DataType::QASYMM8) && (info.output_data_type != DataType::QASYMM8_SIGNED));
86  }
87 
88  return Status{};
89 }
90 
91 template <typename T>
92 typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
93 output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
94  int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
95 {
96  /** Neon vector tag type. */
97  using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
98 
99  ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN);
100  ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier);
101  ARM_COMPUTE_UNUSED(result_shift);
102  ARM_COMPUTE_UNUSED(result_offset_after_shift);
103 
104  const int window_start_x = window.x().start();
105  const int window_end_x = window.x().end();
106  const int window_step_x = 16 / input->info()->element_size();
107  Window win = window;
108  win.set(Window::DimX, Window::Dimension(0, 1, 1));
109 
110  Iterator in(input, win);
111  Iterator out(output, win);
112  execute_window_loop(win, [&](const Coordinates & id)
113  {
114  int x = window_start_x;
115  for(; x <= (window_end_x - window_step_x); x += window_step_x)
116  {
117  // Get bias and pointer to input
118  const auto in_ptr = reinterpret_cast<const T *>(in.ptr()) + x;
119  auto v_in = wrapper::vloadq(in_ptr);
120 
121  // Accumulate bias
122  if(has_bias)
123  {
124  const auto vb = wrapper::vdup_n(*reinterpret_cast<const T *>(bias->ptr_to_element(Coordinates(id.z()))), ExactTagType{});
125  v_in = wrapper::vadd(v_in, vb);
126  }
127 
128  const auto out_ptr = reinterpret_cast<T *>(out.ptr()) + x;
129  wrapper::vstore(out_ptr, v_in);
130  }
131 
132  // Left-overs loop
133  for(; x < window_end_x; ++x)
134  {
135  // Get bias and pointer to input
136  auto s_in = *(reinterpret_cast<const T *>(in.ptr()) + x);
137 
138  // Accumulate bias
139  if(has_bias)
140  {
141  const auto b = *reinterpret_cast<const T *>(bias->ptr_to_element(Coordinates(id.z())));
142  s_in += b;
143  }
144 
145  *(reinterpret_cast<T *>(out.ptr()) + x) = s_in;
146  }
147 
148  },
149  in, out);
150 }
151 
152 template <typename T>
153 typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
154 output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
155  int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
156 {
157  ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier);
158  ARM_COMPUTE_UNUSED(result_shift);
159  ARM_COMPUTE_UNUSED(result_offset_after_shift);
160 
161  Window window_bias = window;
162  window_bias.set(Window::DimX, Window::Dimension(0, 1, 1));
163  window_bias.set(Window::DimY, Window::Dimension(0, 0, 0));
164  window_bias.set(Window::DimZ, Window::Dimension(0, 0, 0));
165  window_bias.set(3, Window::Dimension(0, 0, 0));
166 
167  const int window_start_x = window.x().start();
168  const int window_end_x = window.x().end();
169  const int window_step_x = 16 / input->info()->element_size();
170  Window win = window;
171  win.set(Window::DimX, Window::Dimension(0, 1, 1));
172 
173  Iterator in(input, win);
174  Iterator bi(bias, window_bias);
175  Iterator out(output, win);
176 
177  execute_window_loop(win, [&](const Coordinates &)
178  {
179  int x = window_start_x;
180  for(; x <= (window_end_x - window_step_x); x += window_step_x)
181  {
182  // Get bias and pointer to input
183  const auto in_ptr = reinterpret_cast<const T *>(in.ptr());
184  auto v_in = wrapper::vloadq(in_ptr + x);
185 
186  // Accumulate bias
187  if(has_bias)
188  {
189  const auto bias_ptr = reinterpret_cast<T *>(bi.ptr()) + x;
190  v_in = wrapper::vadd(v_in, wrapper::vloadq(bias_ptr));
191  }
192 
193  const auto out_ptr = reinterpret_cast<T *>(out.ptr());
194  wrapper::vstore(out_ptr + x, v_in);
195  }
196 
197  // Left-overs loop
198  for(; x < window_end_x; ++x)
199  {
200  // Get bias and pointer to input
201  auto s_in = *(reinterpret_cast<const T *>(in.ptr()) + x);
202 
203  // Accumulate bias
204  if(has_bias)
205  {
206  const auto bias_ptr = reinterpret_cast<T *>(bi.ptr()) + x;
207  s_in += *bias_ptr;
208  }
209 
210  const auto out_ptr = reinterpret_cast<T *>(out.ptr());
211  *(out_ptr + x) = s_in;
212  }
213  },
214  in, bi, out);
215 }
216 
217 // Quantized case
218 template < typename TOut, typename std::enable_if < std::is_same<TOut, uint8_t>::value || std::is_same<TOut, int8_t>::value, int >::type = 0 >
219 void output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
220  int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
221 {
222  using VectorType = typename wrapper::traits::neon_bitvector_t<TOut, wrapper::traits::BitWidth::W128>;
223  using TagType = typename wrapper::traits::neon_bitvector_tag_t<TOut, wrapper::traits::BitWidth::W128>;
224 
225  const int32x4_t result_offset_after_shift_s32 = vdupq_n_s32(result_offset_after_shift);
226 
227  const VectorType min = wrapper::vdup_n(std::numeric_limits<TOut>::lowest(), TagType{});
228  const VectorType max = wrapper::vdup_n(std::numeric_limits<TOut>::max(), TagType{});
229 
230  const int window_start_x = window.x().start();
231  const int window_end_x = window.x().end();
232  const int window_step_x = 16 / input->info()->element_size();
233  Window win = window;
234  win.set(Window::DimX, Window::Dimension(0, 1, 1));
235 
236  Iterator in(input, win);
237  Iterator out(output, win);
238 
239  execute_window_loop(win, [&](const Coordinates & id)
240  {
241 
242  int x = window_start_x;
243  for(; x <= (window_end_x - window_step_x); x += window_step_x)
244  {
245  // Get bias and pointer to input
246  const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
247  int32x4x4_t v_in =
248  {
249  {
250  wrapper::vloadq(in_ptr),
251  wrapper::vloadq(in_ptr + 4),
252  wrapper::vloadq(in_ptr + 8),
253  wrapper::vloadq(in_ptr + 12)
254  }
255  };
256 
257  // Accumulate bias
258  if(has_bias)
259  {
260  const auto vb = wrapper::vdup_n(*reinterpret_cast<const int32_t *>(bias->ptr_to_element(Coordinates(id.z()))), TagType{});
261  v_in =
262  {
263  {
264  wrapper::vadd(v_in.val[0], vb),
265  wrapper::vadd(v_in.val[1], vb),
266  wrapper::vadd(v_in.val[2], vb),
267  wrapper::vadd(v_in.val[3], vb)
268  }
269  };
270  }
271 
272  const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
273  wrapper::vstore(out_ptr, finalize_quantization(v_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift_s32,
274  min, max, false));
275  }
276 
277  // Left-overs loop
278  for(; x < window_end_x; ++x)
279  {
280  // Get bias and pointer to input
281  int32_t s_in = *(reinterpret_cast<const int32_t *>(in.ptr()) + x);
282 
283  // Accumulate bias
284  if(has_bias)
285  {
286  const auto b = *reinterpret_cast<const int32_t *>(bias->ptr_to_element(Coordinates(id.z())));
287  s_in += b;
288  }
289 
290  const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
291  *out_ptr = finalize_quantization(s_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift,
292  std::numeric_limits<TOut>::lowest(), std::numeric_limits<TOut>::max(), false);
293  }
294  },
295  in, out);
296 }
297 template < typename TOut, typename std::enable_if < std::is_same<TOut, uint8_t>::value || std::is_same<TOut, int8_t>::value, int >::type = 0 >
298 void output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
299  int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
300 {
301  using VectorType = typename wrapper::traits::neon_bitvector_t<TOut, wrapper::traits::BitWidth::W128>;
302  using TagType = typename wrapper::traits::neon_bitvector_tag_t<TOut, wrapper::traits::BitWidth::W128>;
303 
304  const int32x4_t result_offset_after_shift_s32 = vdupq_n_s32(result_offset_after_shift);
305 
306  const VectorType min = wrapper::vdup_n(std::numeric_limits<TOut>::lowest(), TagType{});
307  const VectorType max = wrapper::vdup_n(std::numeric_limits<TOut>::max(), TagType{});
308 
309  Window window_bias = window;
310  window_bias.set(Window::DimX, Window::Dimension(0, 1, 1));
311  window_bias.set(Window::DimY, Window::Dimension(0, 0, 0));
312  window_bias.set(Window::DimZ, Window::Dimension(0, 0, 0));
313  window_bias.set(3, Window::Dimension(0, 0, 0));
314 
315  const int window_start_x = window.x().start();
316  const int window_end_x = window.x().end();
317  const int window_step_x = 16 / input->info()->element_size();
318  Window win = window;
319  win.set(Window::DimX, Window::Dimension(0, 1, 1));
320 
321  Iterator in(input, win);
322  Iterator bi(bias, window_bias);
323  Iterator out(output, win);
324 
325  execute_window_loop(win, [&](const Coordinates &)
326  {
327  int x = window_start_x;
328  for(; x <= (window_end_x - window_step_x); x += window_step_x)
329  {
330  // Get bias and pointer to input
331  const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
332  int32x4x4_t v_in =
333  {
334  {
335  wrapper::vloadq(in_ptr),
336  wrapper::vloadq(in_ptr + 4),
337  wrapper::vloadq(in_ptr + 8),
338  wrapper::vloadq(in_ptr + 12),
339  }
340  };
341 
342  // Accumulate bias
343  if(has_bias)
344  {
345  const auto bias_ptr = reinterpret_cast<int32_t *>(bi.ptr()) + x;
346 
347  wrapper::vadd(v_in.val[0], wrapper::vloadq(bias_ptr));
348  wrapper::vadd(v_in.val[1], wrapper::vloadq(bias_ptr + 4));
349  wrapper::vadd(v_in.val[2], wrapper::vloadq(bias_ptr + 8));
350  wrapper::vadd(v_in.val[3], wrapper::vloadq(bias_ptr + 12));
351  }
352 
353  const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
354  wrapper::vstore(out_ptr, finalize_quantization(v_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift_s32, min, max, false));
355  }
356 
357  // Left-overs loop
358  for(; x < window_end_x; ++x)
359  {
360  // Get bias and pointer to input
361  const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
362  int32_t s_in = *in_ptr;
363 
364  // Accumulate bias
365  if(has_bias)
366  {
367  const auto bias_ptr = reinterpret_cast<int32_t *>(bi.ptr()) + x;
368  s_in += *bias_ptr;
369  }
370 
371  const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
372  *out_ptr = finalize_quantization(s_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift,
373  std::numeric_limits<TOut>::lowest(), std::numeric_limits<TOut>::max(), false);
374  }
375  },
376  in, bi, out);
377 }
378 } // namespace
379 
381  : _func(nullptr), _input(nullptr), _bias(nullptr), _output(nullptr), _result_fixedpoint_multiplier(0), _result_shift(0), _result_offset_after_shift(0)
382 {
383 }
384 
387 {
388  // Perform validation step
390  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias == nullptr) ? nullptr : bias->info(), (output == nullptr) ? nullptr : output->info(), info));
391 
392  _func = nullptr;
393  _bias = bias;
394  _input = input;
395  _output = (output != nullptr) ? output : input;
396  _result_fixedpoint_multiplier = info.result_fixedpoint_multiplier;
397  _result_shift = info.result_shift;
398  _result_offset_after_shift = info.result_offset_after_shift;
399 
400  // Auto-initialize output output if required
401  if(output != nullptr && output->info() != nullptr)
402  {
403  // Work out expected output data type
404  const DataType output_dt = (input->info()->data_type() == DataType::S32) ? info.output_data_type : DataType::S32;
405  // Output tensor auto initialization if not yet initialized
406  auto_init_if_empty(*output->info(), input->info()->clone()->set_data_type(output_dt));
407  }
408 
409  Window win = calculate_max_window(*input->info(), Steps());
410  Coordinates coord;
411  coord.set_num_dimensions(input->info()->num_dimensions());
412 
413  if(output != nullptr && (output->info()->total_size() != 0))
414  {
415  output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
416  }
417  else
418  {
419  input->info()->set_valid_region(ValidRegion(coord, input->info()->tensor_shape()));
420  }
421 
422  INEKernel::configure(win);
423 
424  const bool is_qasymm8_signed = (output != nullptr) ? is_data_type_quantized_asymmetric_signed(output->info()->data_type()) : false;
425 
426  // Set appropriate function
427  if(input->info()->data_layout() == DataLayout::NCHW)
428  {
429  switch(input->info()->data_type())
430  {
431  case DataType::S32:
432  {
433  if(is_qasymm8_signed)
434  {
435  _func = &output_stage_nchw<int8_t>;
436  }
437  else
438  {
439  _func = &output_stage_nchw<uint8_t>;
440  }
441  break;
442  }
443 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
444  case DataType::F16:
445  {
446  _func = &output_stage_nchw<float16_t>;
447  break;
448  }
449 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
450  case DataType::F32:
451  {
452  _func = &output_stage_nchw<float>;
453  break;
454  }
455  default:
456  {
457  ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
458  }
459  }
460  }
461  else
462  {
463  switch(input->info()->data_type())
464  {
465  case DataType::S32:
466  {
467  if(is_qasymm8_signed)
468  {
469  _func = &output_stage_nhwc<int8_t>;
470  }
471  else
472  {
473  _func = &output_stage_nhwc<uint8_t>;
474  }
475  break;
476  }
477 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
478  case DataType::F16:
479  {
480  _func = &output_stage_nhwc<float16_t>;
481  break;
482  }
483 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
484  case DataType::F32:
485  {
486  _func = &output_stage_nhwc<float>;
487  break;
488  }
489  default:
490  {
491  ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
492  }
493  }
494  }
495 }
496 
499 {
500  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, bias, output, info));
501 
502  return Status{};
503 }
504 
506 {
507  ARM_COMPUTE_UNUSED(info);
510  ARM_COMPUTE_ERROR_ON(_func == nullptr);
511 
512  const bool has_bias = _bias != nullptr;
513  (*_func)(_input, _bias, window, _output, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift, has_bias);
514 }
515 } // namespace arm_compute
int32_t result_fixedpoint_multiplier
Result output stage multiplier used for quantizing.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
Definition: Validate.h:108
int32_t result_offset_after_shift
Result offset used for quantizing.
SimpleTensor< float > b
Definition: DFT.cpp:157
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
uint8x16_t vloadq(const uint8_t *ptr)
Definition: load.h:58
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:39
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
decltype(strategy::transforms) typedef type
Interface for Neon tensor.
Definition: ITensor.h:36
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Copyright (c) 2017-2021 Arm Limited.
virtual void set_valid_region(const ValidRegion &valid_region)=0
Set the valid region of the tensor.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
1 channel, 1 S32 per channel
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
void configure(ITensor *input, const ITensor *bias=nullptr, ITensor *output=nullptr, const DirectConvolutionLayerOutputStageKernelInfo &info=DirectConvolutionLayerOutputStageKernelInfo())
Set the accumulate buffer and the biases of the kernel.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
Coordinates of an item.
Definition: Coordinates.h:37
bool is_data_type_quantized_asymmetric_signed(DataType dt)
Check if a given data type is of asymmetric quantized signed type.
Definition: Utils.h:1209
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
Num samples, channels, height, width.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
static Status validate(const ITensorInfo *input, const ITensorInfo *bias=nullptr, const ITensorInfo *output=nullptr, const DirectConvolutionLayerOutputStageKernelInfo &info=DirectConvolutionLayerOutputStageKernelInfo())
Static function to check if given info will lead to a valid configuration of NEDirectConvolutionLayer...
Information about executing thread and CPU.
Definition: CPPTypes.h:235
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:443
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
DataType output_data_type
Output tensor data type to use if the output is not initialized.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
void vstore(uint8_t *ptr, uint8x8_t val)
Definition: store.h:39
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
Definition: dup_n.h:41
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
void set_num_dimensions(size_t num_dimensions)
Set number of dimensions.
Definition: Dimensions.h:149
quantized, asymmetric fixed-point 8-bit number signed
Includes all wrapper headers at once.
Container for valid region of a window.
Definition: Types.h:188
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
DataType
Available data types.
Definition: Types.h:77
Describe a multidimensional execution window.
Definition: Window.h:39
wrapper::traits::neon_vector< T, 16 >::type finalize_quantization(int32x4x4_t &in_s32, int32x4_t result_shift_s32, typename wrapper::traits::neon_vector< T, 16 >::type min, typename wrapper::traits::neon_vector< T, 16 >::type max)
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: Utils.h:1148
Descriptor used by the direct convolution layer output stage kernels.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
int32_t result_shift
Result output stage shift used for quantizing.