ComputeLibrary/v21.05/_n_e_im2_col_kernel_8cpp_source.xhtml

 /*
  * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "src/core/NEON/kernels/NEIm2ColKernel.h"

 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/Size2D.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Validate.h"
 #include "src/core/CPP/Validate.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"

 #include "arm_compute/core/utils/misc/ShapeCalculator.h"

 #include <arm_neon.h>
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
 #include <tuple>

 using namespace arm_compute;
 using namespace misc::shape_calculator;

 namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
                           bool has_bias, const Size2D &dilation, unsigned int num_groups)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::BFLOAT16, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized(input->data_type()) && has_bias);
     ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Number of groups greater than one are not supported on Neon");

     // Since there's no implicit padding added, check the total input spatial dimensions (with conv paddings) are big enough for the kernel dimensions
     const unsigned int width_idx    = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
     const unsigned int height_idx   = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
     const unsigned     total_width  = input->dimension(width_idx) + conv_info.pad_left() + conv_info.pad_right();
     const unsigned     total_height = input->dimension(height_idx) + conv_info.pad_top() + conv_info.pad_bottom();
     ARM_COMPUTE_RETURN_ERROR_ON((total_width < kernel_dims.width) || (total_height < kernel_dims.height));

     if(output->total_size() > 0)
     {
         TensorInfo expected_output = output->clone()->set_tensor_shape(compute_im2col_conv_shape(input, kernel_dims, conv_info, has_bias, dilation, false));
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output, output);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
     }

     return Status{};
 }

 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
                                                         bool has_bias, const Size2D &dilation)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);

     // Output tensor auto initialization if not yet initialized
     auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_im2col_conv_shape(input, kernel_dims, conv_info, has_bias, dilation, false)));

     const DataLayout   data_layout = input->data_layout();
     const unsigned int width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
     const unsigned int height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
     const unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);

     std::pair<unsigned int, unsigned int> convolved_dims = scaled_dimensions(input->dimension(width_idx), input->dimension(height_idx),
                                                                              kernel_dims.width, kernel_dims.height,
                                                                              conv_info, dilation);

     Window win = calculate_max_window(*input, Steps());
     win.set(width_idx, Window::Dimension(0, convolved_dims.first, 1));
     win.set(height_idx, Window::Dimension(0, convolved_dims.second, 1));
     win.set(channel_idx, Window::Dimension(0, 1, 1));

     // The NEIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped

     return std::make_pair(Status{}, win);
 }

 template <typename T, bool has_pads>
 inline void linearize_volume_nchw(const uint8_t *const in_ptr,
                                   T                   *out_ptr,
                                   bool                 has_bias,
                                   int                  top_left_x,
                                   int                  top_left_y,
                                   int                  kernel_width,
                                   int                  kernel_height,
                                   int                  kernel_depth,
                                   int                  input_w,
                                   int                  input_h,
                                   int                  input_stride_x,
                                   int                  input_stride_y,
                                   int                  input_stride_z,
                                   int                  pad_value,
                                   int                  dilation_x,
                                   int                  dilation_y)
 {
     const int kernel_size2 = kernel_width * kernel_height;
     const int x_e          = top_left_x + kernel_width * dilation_x;
     const int y_e          = top_left_y + kernel_height * dilation_y;

     // Linearize volume
     int d = 0;
     // This for loop linearize a volume with 3 slices. This allows:
     // 1) to reduce the iterations of the outer for loop "d"
     // 2) to have an optimized im2col for the first convolution layer where usually we have 3 IFMs
     for(; d <= (kernel_depth - 3); d += 3)
     {
         for(int y = top_left_y; y < y_e; y += dilation_y)
         {
             if((y < 0 || y >= input_h) && has_pads)
             {
                 // All the values will be the offset (will be zeros when not quantized)
                 for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
                 {
                     *(out_ptr + 0 * kernel_size2) = pad_value;
                     *(out_ptr + 1 * kernel_size2) = pad_value;
                     *(out_ptr + 2 * kernel_size2) = pad_value;
                 }
             }
             else
             {
                 for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
                 {
                     if((x < 0 || x >= input_w) && has_pads)
                     {
                         *(out_ptr + 0 * kernel_size2) = pad_value;
                         *(out_ptr + 1 * kernel_size2) = pad_value;
                         *(out_ptr + 2 * kernel_size2) = pad_value;
                     }
                     else
                     {
                         *(out_ptr + 0 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 0) * input_stride_z + y * input_stride_y + x * input_stride_x)));
                         *(out_ptr + 1 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 1) * input_stride_z + y * input_stride_y + x * input_stride_x)));
                         *(out_ptr + 2 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 2) * input_stride_z + y * input_stride_y + x * input_stride_x)));
                     }
                 }
             }
         }
         out_ptr += 2 * kernel_size2;
     }

     // Left over
     for(; d < kernel_depth; d++)
     {
         for(int y = top_left_y; y < y_e; y += dilation_y)
         {
             if((y < 0 || y >= input_h) && has_pads)
             {
                 // All the values will be the offset (will be zeros when not quantized)
                 memset(static_cast<void *>(out_ptr), pad_value, kernel_width * sizeof(T));
                 out_ptr += kernel_width;
             }
             else
             {
                 for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
                 {
                     if((x < 0 || x >= input_w) && has_pads)
                     {
                         *out_ptr = pad_value;
                     }
                     else
                     {
                         *out_ptr = *(reinterpret_cast<const T *>(in_ptr + (d * input_stride_z + y * input_stride_y + x * input_stride_x)));
                     }
                 }
             }
         }
     }

     // Append 1 if the convolution layer has biases
     if(has_bias)
     {
         *out_ptr = static_cast<T>(1);
     }
 }

 template <typename T, bool has_pads>
 inline void linearize_volume_nhwc(const uint8_t *const in_ptr,
                                   T                   *out_ptr,
                                   bool                 has_bias,
                                   int                  start_x,
                                   int                  start_y,
                                   int                  kernel_width,
                                   int                  kernel_height,
                                   int                  input_w,
                                   int                  input_h,
                                   int                  input_c,
                                   int                  input_stride_y,
                                   int                  input_stride_z,
                                   int                  pad_value,
                                   int                  dilation_x,
                                   int                  dilation_y)
 {
     const int end_x        = start_x + kernel_width * dilation_x;
     const int end_y        = start_y + kernel_height * dilation_y;
     const int pad_quant    = kernel_width * input_c;
     const int element_size = static_cast<int>(sizeof(T));
     if((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1) && (input_stride_y == input_c * element_size))
     {
         for(int y = start_y; y < end_y; y += dilation_y)
         {
             //optimized for no dilation and no boundary pixels
             memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size);
             out_ptr += input_c * kernel_width;
         }
     }
     else
     {
         for(int y = start_y; y < end_y; y += dilation_y)
         {
             if(y < 0 || y >= input_h)
             {
                 memset(static_cast<void *>(out_ptr), pad_value, pad_quant * element_size);
                 out_ptr += pad_quant;
             }
             else if(dilation_x > 1 || start_x < 0 || end_x >= input_w || input_stride_y != input_c * element_size)
             {
                 for(int x = start_x; x < end_x; x += dilation_x)
                 {
                     if(x < 0 || x >= input_w)
                     {
                         memset(static_cast<void *>(out_ptr), pad_value, input_c * element_size);
                         out_ptr += input_c;
                     }
                     else
                     {
                         memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)), input_c * element_size);
                         out_ptr += input_c;
                     }
                 }
             }
             else
             {
                 //optimized for no dilation and no boundary pixels
                 memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size);
                 out_ptr += input_c * kernel_width;
             }
         }
     }
     // Append 1 if the convolution layer has biases
     if(has_bias)
     {
         *out_ptr = static_cast<T>(1);
     }
 }
 } // namespace

 template <typename T, bool has_pads, bool is_nchw>
 void NEIm2ColKernel::run_im2col(const Window &window)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);

     const unsigned int width_idx   = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
     const unsigned int height_idx  = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
     const unsigned int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL);

     const int input_w        = _input->info()->dimension(width_idx);
     const int input_h        = _input->info()->dimension(height_idx);
     const int input_c        = _input->info()->dimension(channel_idx);
     const int input_stride_x = _input->info()->strides_in_bytes().x();
     const int input_stride_y = _input->info()->strides_in_bytes().y();
     const int input_stride_z = _input->info()->strides_in_bytes().z();
     const int pad_left       = _conv_info.pad_left();
     const int pad_top        = _conv_info.pad_top();
     const int stride_x       = _conv_info.stride().first;
     const int stride_y       = _conv_info.stride().second;
     const int pad_value      = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().uniform().offset : 0;

     Window window_in_out(window);
     // The first three dimensions of the input and output are increased by the inner loops
     window_in_out.set(Window::DimX, Window::Dimension(0, 0, 0));
     window_in_out.set(Window::DimY, Window::Dimension(0, 0, 0));
     window_in_out.set(Window::DimZ, Window::Dimension(0, 0, 0));

     // Create iterators
     Iterator in(_input, window_in_out);
     Iterator out(_output, window_in_out);

     execute_window_loop(window, [&](const Coordinates & id)
     {
         const int start_w = id[width_idx] * stride_x - pad_left;
         const int start_h = id[height_idx] * stride_y - pad_top;

         // Get pointers
         const uint8_t *const input_ptr  = in.ptr();
         auto                 output_ptr = reinterpret_cast<T *>(out.ptr() + (id[width_idx] + id[height_idx] * _convolved_dims.first) * _output->info()->strides_in_bytes().y());

         // Linearize volume
         if(is_nchw)
         {
             linearize_volume_nchw<T, has_pads>(input_ptr,
                                                output_ptr,
                                                _has_bias,
                                                start_w,
                                                start_h,
                                                _kernel_width,
                                                _kernel_height,
                                                input_c,
                                                input_w,
                                                input_h,
                                                input_stride_x,
                                                input_stride_y,
                                                input_stride_z,
                                                pad_value,
                                                _dilation.x(),
                                                _dilation.y());
         }
         else
         {
             linearize_volume_nhwc<T, has_pads>(input_ptr,
                                                output_ptr,
                                                _has_bias,
                                                start_w,
                                                start_h,
                                                _kernel_width,
                                                _kernel_height,
                                                input_w,
                                                input_h,
                                                input_c,
                                                input_stride_y,
                                                input_stride_z,
                                                pad_value,
                                                _dilation.x(),
                                                _dilation.y());
         }
     },
     in, out);
 }

 NEIm2ColKernel::NEIm2ColKernel()
     : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1U, 1U), _data_layout(DataLayout::UNKNOWN)
 {
 }

 void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
                                bool has_bias, const Size2D &dilation, unsigned int num_groups)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, num_groups));
     ARM_COMPUTE_UNUSED(num_groups);

     _data_layout                  = input->info()->data_layout();
     const unsigned int width_idx  = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
     const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);

     _input          = input;
     _output         = output;
     _conv_info      = conv_info;
     _kernel_width   = kernel_dims.width;
     _kernel_height  = kernel_dims.height;
     _dilation       = dilation;
     _convolved_dims = scaled_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx),
                                         _kernel_width, _kernel_height,
                                         _conv_info, _dilation);
     _has_bias = has_bias;

     if(_data_layout == DataLayout::NCHW)
     {
         switch(_input->info()->data_type())
         {
             case DataType::F32:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float, false, true> : &NEIm2ColKernel::run_im2col<float, true, true>;
                 break;
 #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
             case DataType::BFLOAT16:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<bfloat16, false, true> : &NEIm2ColKernel::run_im2col<bfloat16, true, true>;
                 break;
 #endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
             case DataType::F16:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float16_t, false, true> : &NEIm2ColKernel::run_im2col<float16_t, true, true>;
                 break;
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
             case DataType::QASYMM8_SIGNED:
             case DataType::QASYMM8:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<qasymm8_t, false, true> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, true>;
                 break;
             default:
                 ARM_COMPUTE_ERROR("Data type not supported");
                 break;
         }
     }
     else
     {
         switch(_input->info()->data_type())
         {
             case DataType::F32:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float, false, false> : &NEIm2ColKernel::run_im2col<float, true, false>;
                 break;
 #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
             case DataType::BFLOAT16:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<bfloat16, false, false> : &NEIm2ColKernel::run_im2col<bfloat16, true, false>;
                 break;
 #endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
             case DataType::F16:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float16_t, false, false> : &NEIm2ColKernel::run_im2col<float16_t, true, false>;
                 break;
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
             case DataType::QASYMM8:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<uint8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>;
                 break;
             case DataType::QASYMM8_SIGNED:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<int8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>;
                 break;
             default:
                 ARM_COMPUTE_ERROR("Data type not supported");
                 break;
         }
     }

     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
     INEKernel::configure(win_config.second);
 }

 Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
                                 bool has_bias, const Size2D &dilation, unsigned int num_groups)
 {
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups));
     ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), kernel_dims, conv_info, has_bias, dilation).first);
     return Status{};
 }

 void NEIm2ColKernel::run(const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);

     (this->*_func)(window);
 }
arm_compute::is_data_type_quantized
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:967

arm_compute::calculate_max_window
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Definition: WindowHelpers.cpp:28

WindowHelpers.h

arm_compute::IKernel::window
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28

ITensor.h

ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
Definition: Validate.h:108

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
Definition: Validate.h:606

ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352

arm_compute::test::validation::conv_info
conv_info
Definition: Winograd.cpp:647

ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204

arm_compute::ITensorInfo::data_type
virtual DataType data_type() const =0
Data type used for each element of the tensor.

arm_compute::Format::F32
1 channel, 1 F32 per channel

arm_compute::DataLayoutDimension::HEIGHT
height

arm_compute::test::validation::data_layout
const DataLayout data_layout
Definition: Im2Col.cpp:151

arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:40

ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455

arm_compute::Window::Dimension
Describe one of the image's dimensions with a start, end and step.
Definition: Window.h:77

arm_compute::Size2D::x
size_t x() const
Semantic accessor for width as x.
Definition: Size2D.h:74

arm_compute::Status
Status class.
Definition: Error.h:52

arm_compute::test::validation::expected_output
SimpleTensor< uint8_t > expected_output(output_shape, DataType::QASYMM8, 1, qasymm)

TensorInfo.h

ARM_COMPUTE_RETURN_ERROR_ON
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296

arm_compute::ITensor
Interface for CPU tensor.
Definition: ITensor.h:36

arm_compute
Copyright (c) 2017-2021 Arm Limited.
Definition: introduction.dox:24

arm_compute::Size2D::height
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:90

arm_compute::Format::F16
1 channel, 1 F16 per channel

Size2D.h

arm_compute::scaled_dimensions
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
Definition: Utils.cpp:395

arm_compute::Tensor::info
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: Tensor.cpp:33

arm_compute::test::validation::input
auto input
Definition: LSTMLayerQuantized.cpp:486

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159

arm_compute::Format::BFLOAT16
16-bit brain floating-point number

input_stride_y
const size_t input_stride_y
Definition: CpuDepthwiseConvolutionNativeKernel.cpp:62

arm_compute::Window::DimX
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43

ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152

arm_compute::DataType::QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned

arm_compute::Steps
Class to describe a number of elements in each dimension.
Definition: Steps.h:40

arm_compute::test::validation::num_groups
const unsigned int num_groups
Definition: Im2Col.cpp:153

arm_compute::Coordinates
Coordinates of an item.
Definition: Coordinates.h:37

arm_compute::utils::cast::U
U
Definition: SaturateCast.h:57

arm_compute::auto_init_if_empty
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: AutoConfiguration.h:42

ShapeCalculator.h

arm_compute::misc::ICloneable::clone
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.

arm_compute::NEIm2ColKernel::NEIm2ColKernel
NEIm2ColKernel()
Default constructor.
Definition: NEIm2ColKernel.cpp:357

Validate.h

arm_compute::ITensor::info
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.

arm_compute::PadStrideInfo
Padding and stride information class.
Definition: Types.h:650

arm_compute::test::validation::has_bias
const bool has_bias
Definition: Im2Col.cpp:152

Error.h

arm_compute::DataLayoutDimension::CHANNEL
channel

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:915

arm_compute::DataLayout::NCHW
Num samples, channels, height, width.

arm_compute::Size2D::y
size_t y() const
Semantic accessor for height as y.
Definition: Size2D.h:83

arm_compute::Window::DimY
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45

arm_compute::NEIm2ColKernel::run
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Definition: NEIm2ColKernel.cpp:453

arm_compute::test::validation::info
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)

AutoConfiguration.h

arm_compute::ThreadInfo
Information about executing thread and CPU.
Definition: CPPTypes.h:252

arm_compute::ITensorInfo::total_size
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:439

arm_compute::Size2D::width
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:89

arm_compute::Window::DimZ
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47

arm_compute::CLVersion::UNKNOWN

arm_compute::Size2D
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788

arm_compute::validate_arguments
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
Definition: NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp:45

input_stride_z
const size_t input_stride_z
Definition: CpuDepthwiseConvolutionNativeKernel.cpp:63

Validate.h

NEIm2ColKernel.h

arm_compute::NEIm2ColKernel::configure
void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Set the input and output of the kernel.
Definition: NEIm2ColKernel.cpp:362

ARM_COMPUTE_RETURN_ERROR_ON_MSG
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244

arm_compute::DataLayoutDimension::WIDTH
width

ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157

arm_compute::TensorInfo
Store the tensor's metadata.
Definition: TensorInfo.h:43

Helpers.h

arm_compute::execute_window_loop
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77

arm_compute::Dimensions::y
T y() const
Alias to access the size of the second dimension.
Definition: Dimensions.h:92

arm_compute::DataType::QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed

arm_compute::ITensorInfo::strides_in_bytes
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.

arm_compute::get_data_layout_dimension_index
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193

arm_compute::Iterator
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46

Types.h

arm_compute::DataLayout
DataLayout
[DataLayout enum definition]
Definition: Types.h:114

arm_compute::NEIm2ColKernel::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of NEIm2ColKernel.
Definition: NEIm2ColKernel.cpp:445

arm_compute::misc::shape_calculator::compute_im2col_conv_shape
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
Definition: ShapeCalculator.h:522

arm_compute::Window
Describe a multidimensional execution window.
Definition: Window.h:39

ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:201