46 using namespace misc::shape_calculator;
79 bool has_bias,
const Size2D &dilation)
103 return std::make_pair(
Status{}, win);
106 template <
typename T,
bool has_pads>
107 inline void linearize_volume_nchw(
const uint8_t *
const in_ptr,
124 const int kernel_size2 = kernel_width * kernel_height;
125 const int x_e = top_left_x + kernel_width * dilation_x;
126 const int y_e = top_left_y + kernel_height * dilation_y;
133 for(; d <= (kernel_depth - 3); d += 3)
135 for(
int y = top_left_y; y < y_e; y += dilation_y)
137 if((y < 0 || y >= input_h) && has_pads)
140 for(
int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
142 *(out_ptr + 0 * kernel_size2) = pad_value;
143 *(out_ptr + 1 * kernel_size2) = pad_value;
144 *(out_ptr + 2 * kernel_size2) = pad_value;
149 for(
int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
151 if((x < 0 || x >= input_w) && has_pads)
153 *(out_ptr + 0 * kernel_size2) = pad_value;
154 *(out_ptr + 1 * kernel_size2) = pad_value;
155 *(out_ptr + 2 * kernel_size2) = pad_value;
159 *(out_ptr + 0 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 0) * input_stride_z + y * input_stride_y + x * input_stride_x)));
160 *(out_ptr + 1 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 1) * input_stride_z + y * input_stride_y + x * input_stride_x)));
161 *(out_ptr + 2 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 2) * input_stride_z + y * input_stride_y + x * input_stride_x)));
166 out_ptr += 2 * kernel_size2;
170 for(; d < kernel_depth; d++)
172 for(
int y = top_left_y; y < y_e; y += dilation_y)
174 if((y < 0 || y >= input_h) && has_pads)
177 memset(static_cast<void *>(out_ptr), pad_value, kernel_width *
sizeof(T));
178 out_ptr += kernel_width;
182 for(
int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
184 if((x < 0 || x >= input_w) && has_pads)
186 *out_ptr = pad_value;
190 *out_ptr = *(
reinterpret_cast<const T *
>(in_ptr + (d * input_stride_z + y * input_stride_y + x * input_stride_x)));
200 *out_ptr =
static_cast<T
>(1);
204 template <
typename T,
bool has_pads>
205 inline void linearize_volume_nhwc(
const uint8_t *
const in_ptr,
221 const int end_x = start_x + kernel_width * dilation_x;
222 const int end_y = start_y + kernel_height * dilation_y;
223 const int pad_quant = kernel_width * input_c;
224 const int element_size =
static_cast<int>(
sizeof(T));
225 if((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1) && (input_stride_y == input_c * element_size))
227 for(
int y = start_y; y < end_y; y += dilation_y)
230 memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size);
231 out_ptr += input_c * kernel_width;
236 for(
int y = start_y; y < end_y; y += dilation_y)
238 if(y < 0 || y >= input_h)
240 memset(static_cast<void *>(out_ptr), pad_value, pad_quant * element_size);
241 out_ptr += pad_quant;
243 else if(dilation_x > 1 || start_x < 0 || end_x >= input_w || input_stride_y != input_c * element_size)
245 for(
int x = start_x; x < end_x; x += dilation_x)
247 if(x < 0 || x >= input_w)
249 memset(static_cast<void *>(out_ptr), pad_value, input_c * element_size);
254 memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)), input_c * element_size);
262 memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size);
263 out_ptr += input_c * kernel_width;
270 *out_ptr =
static_cast<T
>(1);
275 template <
typename T,
bool has_pads,
bool is_nchw>
276 void NEIm2ColKernel::run_im2col(
const Window &window)
285 const int input_w = _input->info()->dimension(width_idx);
286 const int input_h = _input->info()->dimension(height_idx);
287 const int input_c = _input->info()->dimension(channel_idx);
288 const int input_stride_x = _input->info()->strides_in_bytes().x();
289 const int input_stride_y = _input->info()->strides_in_bytes().y();
290 const int input_stride_z = _input->info()->strides_in_bytes().z();
291 const int pad_left = _conv_info.pad_left();
292 const int pad_top = _conv_info.pad_top();
293 const int stride_x = _conv_info.stride().first;
294 const int stride_y = _conv_info.stride().second;
295 const int pad_value =
is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().uniform().offset : 0;
297 Window window_in_out(window);
305 Iterator out(_output, window_in_out);
309 const int start_w =
id[width_idx] * stride_x - pad_left;
310 const int start_h =
id[height_idx] * stride_y - pad_top;
313 const uint8_t *
const input_ptr = in.ptr();
314 auto output_ptr =
reinterpret_cast<T *
>(out.ptr() + (
id[width_idx] +
id[height_idx] * _convolved_dims.first) * _output->
info()->
strides_in_bytes().
y());
319 linearize_volume_nchw<T, has_pads>(input_ptr,
338 linearize_volume_nhwc<T, has_pads>(input_ptr,
359 : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1
U, 1
U), _data_layout(
DataLayout::
UNKNOWN)
364 bool has_bias,
const Size2D &dilation,
unsigned int num_groups)
377 _kernel_width = kernel_dims.
width;
378 _kernel_height = kernel_dims.
height;
379 _dilation = dilation;
381 _kernel_width, _kernel_height,
382 _conv_info, _dilation);
390 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float, false, true> : &NEIm2ColKernel::run_im2col<float, true, true>;
392 #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) 394 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<bfloat16, false, true> : &NEIm2ColKernel::run_im2col<bfloat16, true, true>;
397 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 399 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float16_t, false, true> : &NEIm2ColKernel::run_im2col<float16_t, true, true>;
404 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<qasymm8_t, false, true> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, true>;
416 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float, false, false> : &NEIm2ColKernel::run_im2col<float, true, false>;
418 #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) 420 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<bfloat16, false, false> : &NEIm2ColKernel::run_im2col<bfloat16, true, false>;
423 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 425 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float16_t, false, false> : &NEIm2ColKernel::run_im2col<float16_t, true, false>;
429 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<uint8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>;
432 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<int8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>;
441 auto win_config = validate_and_configure_window(input->
info(), output->
info(), kernel_dims,
conv_info,
has_bias, dilation);
443 INEKernel::configure(win_config.second);
447 bool has_bias,
const Size2D &dilation,
unsigned int num_groups)
460 (this->*_func)(window);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
const DataLayout data_layout
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
size_t x() const
Semantic accessor for width as x.
unsigned int pad_top() const
Get the top padding.
SimpleTensor< uint8_t > expected_output(output_shape, DataType::QASYMM8, 1, qasymm)
const size_t input_stride_y
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Interface for Neon tensor.
Copyright (c) 2017-2021 Arm Limited.
virtual void set_valid_region(const ValidRegion &valid_region)=0
Set the valid region of the tensor.
size_t height
Height of the image region or rectangle.
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
16-bit brain floating-point number
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
const unsigned int num_groups
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
NEIm2ColKernel()
Default constructor.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
unsigned int pad_right() const
Get the right padding.
Padding and stride information class.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Num samples, channels, height, width.
size_t y() const
Semantic accessor for height as y.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
size_t width
Width of the image region or rectangle.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Set the input and output of the kernel.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
const size_t input_stride_z
T y() const
Alias to access the size of the second dimension.
quantized, asymmetric fixed-point 8-bit number signed
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
Container for valid region of a window.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
unsigned int pad_bottom() const
Get the bottom padding.
Iterator updated by execute_window_loop for each window element.
unsigned int pad_left() const
Get the left padding.
DataLayout
[DataLayout enum definition]
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of NEIm2ColKernel.
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.