47 using namespace misc::shape_calculator;
54 Status validate_arguments(
const ITensorInfo *
input,
const ITensorInfo *output,
const Size2D &kernel_dims,
const PadStrideInfo &
conv_info,
67 const unsigned total_width = input->dimension(width_idx) + conv_info.pad_left() + conv_info.pad_right();
68 const unsigned total_height = input->dimension(height_idx) + conv_info.pad_top() + conv_info.pad_bottom();
71 if(output->total_size() > 0)
82 template <
typename T,
bool has_pads>
83 inline void linearize_volume_nchw(
const uint8_t *
const in_ptr,
100 const int kernel_size2 = kernel_width * kernel_height;
101 const int x_e = top_left_x + kernel_width * dilation_x;
102 const int y_e = top_left_y + kernel_height * dilation_y;
109 for(; d <= (kernel_depth - 3); d += 3)
111 for(
int y = top_left_y; y < y_e; y += dilation_y)
113 if((y < 0 || y >= input_h) && has_pads)
116 for(
int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
118 *(out_ptr + 0 * kernel_size2) = pad_value;
119 *(out_ptr + 1 * kernel_size2) = pad_value;
120 *(out_ptr + 2 * kernel_size2) = pad_value;
125 for(
int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
127 if((x < 0 || x >= input_w) && has_pads)
129 *(out_ptr + 0 * kernel_size2) = pad_value;
130 *(out_ptr + 1 * kernel_size2) = pad_value;
131 *(out_ptr + 2 * kernel_size2) = pad_value;
135 *(out_ptr + 0 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 0) * input_stride_z + y * input_stride_y + x * input_stride_x)));
136 *(out_ptr + 1 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 1) * input_stride_z + y * input_stride_y + x * input_stride_x)));
137 *(out_ptr + 2 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 2) * input_stride_z + y * input_stride_y + x * input_stride_x)));
142 out_ptr += 2 * kernel_size2;
146 for(; d < kernel_depth; d++)
148 for(
int y = top_left_y; y < y_e; y += dilation_y)
150 if((y < 0 || y >= input_h) && has_pads)
153 memset(static_cast<void *>(out_ptr), pad_value, kernel_width *
sizeof(T));
154 out_ptr += kernel_width;
158 for(
int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
160 if((x < 0 || x >= input_w) && has_pads)
162 *out_ptr = pad_value;
166 *out_ptr = *(
reinterpret_cast<const T *
>(in_ptr + (d * input_stride_z + y * input_stride_y + x * input_stride_x)));
176 *out_ptr =
static_cast<T
>(1);
180 template <
typename T,
bool has_pads>
181 inline void linearize_volume_nhwc(
const uint8_t *
const in_ptr,
197 const int end_x = start_x + kernel_width * dilation_x;
198 const int end_y = start_y + kernel_height * dilation_y;
199 const int pad_quant = kernel_width * input_c;
200 const int element_size =
static_cast<int>(
sizeof(T));
201 if((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1) && (input_stride_y == input_c * element_size))
203 for(
int y = start_y; y < end_y; y += dilation_y)
206 memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size);
207 out_ptr += input_c * kernel_width;
212 for(
int y = start_y; y < end_y; y += dilation_y)
214 if(y < 0 || y >= input_h)
216 memset(static_cast<void *>(out_ptr), pad_value, pad_quant * element_size);
217 out_ptr += pad_quant;
219 else if(dilation_x > 1 || start_x < 0 || end_x >= input_w || input_stride_y != input_c * element_size)
221 for(
int x = start_x; x < end_x; x += dilation_x)
223 if(x < 0 || x >= input_w)
225 memset(static_cast<void *>(out_ptr), pad_value, input_c * element_size);
230 memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)), input_c * element_size);
238 memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size);
239 out_ptr += input_c * kernel_width;
246 *out_ptr =
static_cast<T
>(1);
251 template <
typename T,
bool has_pads,
bool is_nchw>
252 void CpuIm2ColKernel::run_im2col(
const ITensor *
src, ITensor *
dst,
const Window &window)
261 const int input_w = src->info()->dimension(width_idx);
262 const int input_h = src->info()->dimension(height_idx);
263 const int input_c = src->info()->dimension(channel_idx);
264 const int input_stride_x = src->info()->strides_in_bytes().x();
265 const int input_stride_y = src->info()->strides_in_bytes().y();
266 const int input_stride_z = src->info()->strides_in_bytes().z();
267 const int pad_left = _conv_info.pad_left();
268 const int pad_top = _conv_info.pad_top();
269 const int stride_x = _conv_info.stride().first;
270 const int stride_y = _conv_info.stride().second;
271 const int pad_value =
is_data_type_quantized(src->info()->data_type()) ? src->info()->quantization_info().uniform().offset : 0;
273 Window window_in_out(window);
275 window_in_out.set(
Window::DimX, Window::Dimension(0, 0, 0));
276 window_in_out.set(
Window::DimY, Window::Dimension(0, 0, 0));
277 window_in_out.set(
Window::DimZ, Window::Dimension(0, 0, 0));
280 Iterator in(src, window_in_out);
281 Iterator out(dst, window_in_out);
285 const int start_w =
id[width_idx] * stride_x - pad_left;
286 const int start_h =
id[height_idx] * stride_y - pad_top;
289 const uint8_t *
const input_ptr = in.ptr();
290 auto output_ptr =
reinterpret_cast<T *
>(out.ptr() + (
id[width_idx] +
id[height_idx] * _convolved_dims.first) * dst->info()->strides_in_bytes().y());
295 linearize_volume_nchw<T, has_pads>(input_ptr,
314 linearize_volume_nhwc<T, has_pads>(input_ptr,
335 bool has_bias,
const Size2D &dilation,
unsigned int num_groups)
347 _kernel_width = kernel_dims.
width;
348 _kernel_height = kernel_dims.
height;
349 _dilation = dilation;
351 _kernel_width, _kernel_height,
352 _conv_info, _dilation);
360 _func = (!conv_info.
has_padding()) ? &CpuIm2ColKernel::run_im2col<float, false, true> : &CpuIm2ColKernel::run_im2col<float, true, true>;
362 #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) 364 _func = (!conv_info.
has_padding()) ? &CpuIm2ColKernel::run_im2col<bfloat16, false, true> : &CpuIm2ColKernel::run_im2col<bfloat16, true, true>;
367 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 369 _func = (!conv_info.
has_padding()) ? &CpuIm2ColKernel::run_im2col<float16_t, false, true> : &CpuIm2ColKernel::run_im2col<float16_t, true, true>;
374 _func = (!conv_info.
has_padding()) ? &CpuIm2ColKernel::run_im2col<qasymm8_t, false, true> : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, true>;
386 _func = (!conv_info.
has_padding()) ? &CpuIm2ColKernel::run_im2col<float, false, false> : &CpuIm2ColKernel::run_im2col<float, true, false>;
388 #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) 390 _func = (!conv_info.
has_padding()) ? &CpuIm2ColKernel::run_im2col<bfloat16, false, false> : &CpuIm2ColKernel::run_im2col<bfloat16, true, false>;
393 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 395 _func = (!conv_info.
has_padding()) ? &CpuIm2ColKernel::run_im2col<float16_t, false, false> : &CpuIm2ColKernel::run_im2col<float16_t, true, false>;
399 _func = (!conv_info.
has_padding()) ? &CpuIm2ColKernel::run_im2col<uint8_t, false, false> : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, false>;
402 _func = (!conv_info.
has_padding()) ? &CpuIm2ColKernel::run_im2col<int8_t, false, false> : &CpuIm2ColKernel::run_im2col<qasymm8_t, true, false>;
426 bool has_bias,
const Size2D &dilation,
unsigned int num_groups)
440 (this->*_func)(src, dst, window);
444 return "CpuIm2ColKernel";
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
const size_t input_stride_y
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
const size_t input_stride_z
SimpleTensor< uint8_t > expected_output(output_shape, DataType::QASYMM8, 1, qasymm)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
16-bit brain floating-point number
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
const unsigned int num_groups
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
Padding and stride information class.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Num samples, channels, height, width.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Information about executing thread and CPU.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
size_t width
Width of the image region or rectangle.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
quantized, asymmetric fixed-point 8-bit number signed
im2col_func configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias)
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
bool has_padding() const
Check whether this has any padding.