46 using namespace misc::shape_calculator;
102 return std::make_pair(
Status{}, win);
105 template <
typename T,
bool has_pads>
106 inline void linearize_volume_nchw(
const uint8_t *
const in_ptr,
123 const int kernel_size2 = kernel_width * kernel_height;
124 const int x_e = top_left_x + kernel_width * dilation_x;
125 const int y_e = top_left_y + kernel_height * dilation_y;
132 for(; d <= (kernel_depth - 3); d += 3)
134 for(
int y = top_left_y; y < y_e; y += dilation_y)
136 if((y < 0 || y >= input_h) && has_pads)
139 for(
int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
141 *(out_ptr + 0 * kernel_size2) = pad_value;
142 *(out_ptr + 1 * kernel_size2) = pad_value;
143 *(out_ptr + 2 * kernel_size2) = pad_value;
148 for(
int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
150 if((x < 0 || x >= input_w) && has_pads)
152 *(out_ptr + 0 * kernel_size2) = pad_value;
153 *(out_ptr + 1 * kernel_size2) = pad_value;
154 *(out_ptr + 2 * kernel_size2) = pad_value;
158 *(out_ptr + 0 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 0) *
input_stride_z + y *
input_stride_y + x * input_stride_x)));
159 *(out_ptr + 1 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 1) *
input_stride_z + y *
input_stride_y + x * input_stride_x)));
160 *(out_ptr + 2 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 2) *
input_stride_z + y *
input_stride_y + x * input_stride_x)));
165 out_ptr += 2 * kernel_size2;
169 for(; d < kernel_depth; d++)
171 for(
int y = top_left_y; y < y_e; y += dilation_y)
173 if((y < 0 || y >= input_h) && has_pads)
176 memset(static_cast<void *>(out_ptr), pad_value, kernel_width *
sizeof(T));
177 out_ptr += kernel_width;
181 for(
int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
183 if((x < 0 || x >= input_w) && has_pads)
185 *out_ptr = pad_value;
199 *out_ptr = static_cast<T>(1);
203 template <
typename T,
bool has_pads>
204 inline void linearize_volume_nhwc(
const uint8_t *
const in_ptr,
220 const int end_x = start_x + kernel_width * dilation_x;
221 const int end_y = start_y + kernel_height * dilation_y;
222 const int pad_quant = kernel_width * input_c;
223 const int element_size = static_cast<int>(
sizeof(T));
224 if((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1) && (
input_stride_y == input_c * element_size))
226 for(
int y = start_y; y < end_y; y += dilation_y)
229 memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y *
input_stride_z + start_x *
input_stride_y)), input_c * kernel_width * element_size);
230 out_ptr += input_c * kernel_width;
235 for(
int y = start_y; y < end_y; y += dilation_y)
237 if(y < 0 || y >= input_h)
239 memset(static_cast<void *>(out_ptr), pad_value, pad_quant * element_size);
240 out_ptr += pad_quant;
242 else if(dilation_x > 1 || start_x < 0 || end_x >= input_w ||
input_stride_y != input_c * element_size)
244 for(
int x = start_x; x < end_x; x += dilation_x)
246 if(x < 0 || x >= input_w)
248 memset(static_cast<void *>(out_ptr), pad_value, input_c * element_size);
261 memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y *
input_stride_z + start_x *
input_stride_y)), input_c * kernel_width * element_size);
262 out_ptr += input_c * kernel_width;
269 *out_ptr = static_cast<T>(1);
274 template <
typename T,
bool has_pads,
bool is_nchw>
275 void NEIm2ColKernel::run_im2col(
const Window &window)
284 const int input_w = _input->info()->dimension(width_idx);
285 const int input_h = _input->info()->dimension(height_idx);
286 const int input_c = _input->info()->dimension(channel_idx);
287 const int input_stride_x = _input->info()->strides_in_bytes().x();
288 const int input_stride_y = _input->info()->strides_in_bytes().y();
289 const int input_stride_z = _input->info()->strides_in_bytes().z();
290 const int pad_left = _conv_info.pad_left();
291 const int pad_top = _conv_info.pad_top();
292 const int stride_x = _conv_info.stride().first;
293 const int stride_y = _conv_info.stride().second;
294 const int pad_value =
is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().uniform().offset : 0;
296 Window window_in_out(window);
304 Iterator out(_output, window_in_out);
308 const int start_w =
id[width_idx] * stride_x - pad_left;
309 const int start_h =
id[height_idx] * stride_y - pad_top;
312 const uint8_t *
const input_ptr = in.ptr();
313 auto output_ptr = reinterpret_cast<T *>(out.ptr() + (
id[width_idx] +
id[height_idx] * _convolved_dims.first) * _output->
info()->
strides_in_bytes().
y());
318 linearize_volume_nchw<T, has_pads>(input_ptr,
337 linearize_volume_nhwc<T, has_pads>(input_ptr,
358 : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1
U, 1
U), _data_layout(
DataLayout::
UNKNOWN)
369 _data_layout =
input->info()->data_layout();
376 _kernel_width = kernel_dims.
width;
377 _kernel_height = kernel_dims.
height;
378 _dilation = dilation;
380 _kernel_width, _kernel_height,
381 _conv_info, _dilation);
389 _func = (!
conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float, false, true> : &NEIm2ColKernel::run_im2col<float, true, true>;
391 #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) 393 _func = (!
conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<bfloat16, false, true> : &NEIm2ColKernel::run_im2col<bfloat16, true, true>;
396 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 398 _func = (!
conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float16_t, false, true> : &NEIm2ColKernel::run_im2col<float16_t, true, true>;
403 _func = (!
conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<qasymm8_t, false, true> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, true>;
415 _func = (!
conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float, false, false> : &NEIm2ColKernel::run_im2col<float, true, false>;
417 #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) 419 _func = (!
conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<bfloat16, false, false> : &NEIm2ColKernel::run_im2col<bfloat16, true, false>;
422 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 424 _func = (!
conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float16_t, false, false> : &NEIm2ColKernel::run_im2col<float16_t, true, false>;
428 _func = (!
conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<uint8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>;
431 _func = (!
conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<int8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>;
442 INEKernel::configure(win_config.second);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
const DataLayout data_layout
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
size_t x() const
Semantic accessor for width as x.
SimpleTensor< uint8_t > expected_output(output_shape, DataType::QASYMM8, 1, qasymm)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Interface for CPU tensor.
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
16-bit brain floating-point number
const size_t input_stride_y
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
const unsigned int num_groups
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
NEIm2ColKernel()
Default constructor.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Num samples, channels, height, width.
size_t y() const
Semantic accessor for height as y.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
size_t width
Width of the image region or rectangle.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
const size_t input_stride_z
void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Set the input and output of the kernel.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
T y() const
Alias to access the size of the second dimension.
quantized, asymmetric fixed-point 8-bit number signed
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Iterator updated by execute_window_loop for each window element.
DataLayout
[DataLayout enum definition]
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of NEIm2ColKernel.
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)