51 Status validate_arguments(
const ITensorInfo *
src,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *
dst,
52 const PadStrideInfo &
conv_info,
const ActivationLayerInfo &act_info)
63 ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(channel_idx) != src->dimension(channel_idx),
"Weights feature map dimension should match the respective src's one");
69 ARM_COMPUTE_RETURN_ERROR_ON_MSG((weights->dimension(width_idx) == 1) && std::get<0>(conv_info.stride()) > 3,
"Strides larger than 3 not supported for 1x1 convolution.");
70 ARM_COMPUTE_RETURN_ERROR_ON_MSG((weights->dimension(width_idx) == 3 || weights->dimension(width_idx) == 5 || weights->dimension(width_idx) == 9) && std::get<0>(conv_info.stride()) > 2,
71 "Strides larger than 2 not supported for 3x3, 5x5, 9x9 convolution.");
76 ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(width_idx) != 1 && weights->dimension(width_idx) != 3 && weights->dimension(width_idx) != 5 && weights->dimension(width_idx) != 9,
77 "Kernel sizes other than 1x1, 3x3, 5x5 or 9x9 are not supported with quantized data types");
82 "Kernel sizes other than 1x1, 3x3 or 5x5 are not supported with float data types");
97 "Biases size and number of dst feature maps should match");
99 "Biases should be one dimensional");
103 if(dst->total_size() != 0)
113 const UniformQuantizationInfo iqinfo = src->quantization_info().uniform();
114 const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform();
115 const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform();
117 float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
118 int output_multiplier = 0;
119 int output_shift = 0;
125 bool export_to_cl_image_support(ITensorInfo *tensor,
GPUTarget gpu_target,
DataLayout data_layout)
155 const size_t image_w = tensor->tensor_shape()[0] / 4;
156 const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
160 if(image_w > max_image_w || image_h > max_image_h)
192 const unsigned int kernel_size = weights->
dimension(width_idx);
196 unsigned int _num_elems_processed_per_iteration = 0;
211 const unsigned int vec_size = std::min(static_cast<unsigned int>(dst->
tensor_shape()[0]), 4u);
212 unsigned int num_rows = 1
U;
223 _num_elems_processed_per_iteration = 1u;
227 ICLKernel::configure_internal(win);
234 kernel_name <<
"direct_convolution_nhwc";
236 const unsigned int n0 = win.x().step();
237 const unsigned int m0 = win.y().step();
239 const unsigned int partial_store_n0 = dst->
dimension(channel_idx) % n0;
240 const unsigned int pad_left = conv_info.
pad_left();
241 const unsigned int pad_top = conv_info.
pad_top();
242 const bool export_to_cl_image = export_to_cl_image_support(weights, gpu_target, _data_layout);
245 if(export_to_cl_image)
250 if(biases !=
nullptr)
252 build_options.
add_option(std::string(
"-DHAS_BIAS"));
256 build_options.
add_option(
"-cl-fast-relaxed-math");
257 build_options.
add_option(
"-DSRC_TENSOR_TYPE=BUFFER");
259 build_options.
add_option(
"-DDST_TENSOR_TYPE=BUFFER");
261 build_options.
add_option_if_else(export_to_cl_image,
"-DWEI_TENSOR_TYPE=IMAGE",
"-DWEI_TENSOR_TYPE=BUFFER");
284 zero_value.
get(zero_value_s32);
287 int output_multiplier = 0;
288 int output_shift = 0;
312 kernel_name <<
"direct_convolution_nchw";
313 build_options.
add_option_if(biases !=
nullptr, std::string(
"-DHAS_BIAS"));
338 int output_multiplier = 0;
339 int output_shift = 0;
354 _config_id = kernel_name.str();
401 cl::Image2D weights_cl_image;
403 const size_t dim_y_collapsed =
ceil_to_multiple(dst->info()->dimension(1) * dst->info()->dimension(2), slice.
y().
step());
409 if(export_to_cl_image)
411 const size_t image_w = weights->info()->dimension(0) / 4;
412 const size_t image_h = weights->info()->dimension(1) * weights->info()->dimension(2) * weights->info()->dimension(3);
414 const size_t image_row_pitch = weights->info()->strides_in_bytes()[1];
420 unsigned int idx = 0;
423 if(export_to_cl_image)
425 _kernel.setArg(idx++, weights_cl_image);
428 if(biases !=
nullptr)
439 if(biases !=
nullptr)
446 _kernel.setArg(idx1++, static_cast<unsigned int>(weights->info()->strides_in_bytes()[3]));
450 unsigned int idx = 0;
void add_4d_tensor_nhwc_argument(unsigned int &idx, const ICLTensor *tensor)
Add the passed NHWC 4D tensor's parameters to the object's kernel's arguments by passing strides...
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Class describing the value of a pixel for any image format.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
const Window & window() const
The maximum window the kernel can be executed on.
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
const StringSet & options() const
Gets the current options list set.
constexpr int step() const
Return the step of the dimension.
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
float a() const
Get the alpha value.
void get(uint8_t &v) const
Interpret the pixel value as a U8.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
const std::string & string_from_activation_func(ActivationLayerInfo::ActivationFunction act)
Translates a given activation function to a string.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
unsigned int pad_top() const
Get the top padding.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
GPUTarget get_arch_from_target(GPUTarget target)
Helper function to get the GPU arch.
std::string lower_string(const std::string &val)
Lower a given string.
Activation Layer Information class.
std::set< std::string > build_options
void update_padding_for_cl_image(ITensorInfo *tensor)
Update padding required to export the OpenCL buffer to OpenCL image2d.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx...
void use_tensor_dimensions(const TensorShape &shape, size_t first_dimension=Window::DimX)
Use the tensor's dimensions to fill the window dimensions.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
Set the src, weights, biases and dst tensors info.
1 channel, 1 F16 per channel
1 channel, 1 S32 per channel
void add_option(std::string option)
Adds option to the existing build option list.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
GPUTarget get_target() const
Get the targeted GPU architecture.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual BorderSize border_size() const
The size of the border for that kernel.
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
Padding and stride information class.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
const size_t conv_stride_x
Num samples, channels, height, width.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
GPUTarget
Available GPU Targets.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch)
Create a cl::Image2D object from an OpenCL buffer.
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size.
ActivationFunction activation() const
Get the type of activation function.
float b() const
Get the beta value.
quantized, asymmetric fixed-point 8-bit number signed
const size_t conv_stride_y
void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx...
Window first_slice_window_3D() const
First 3D slice of the window.
DataType
Available data types.
unsigned int pad_left() const
Get the left padding.
DataLayout
[DataLayout enum definition]
Describe a multidimensional execution window.
TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
Calculate the deep convolution shape output shape of a tensor.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
const cl::Device & get_device()
Gets the CL device for which the programs are created.
void add_option_if_else(bool cond, std::string option_true, std::string option_false)
Adds first option if condition is true else the second one.