54 if(output->total_size() != 0)
72 unsigned int num_elems_processed_per_iteration_x =
adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0));
75 return std::make_pair(Status{}, win_collapsed);
79 const unsigned int num_elems_processed_per_iteration_x = max_cl_vector_width / input->element_size();
80 constexpr
unsigned int num_elems_processed_per_iteration_y = 2;
83 Window win =
calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
84 AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
85 AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
92 return std::make_pair(err, win_collapsed);
98 : _input(nullptr), _output(nullptr)
120 unsigned int vec_size_x = 0;
121 unsigned int vec_size_x_leftovers = 0;
125 vec_size_x_leftovers = input->
info()->
dimension(0) % vec_size_x;
150 ICLKernel::configure_internal(win_config.second);
189 unsigned int idx = 0;
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
const Window & window() const
The maximum window the kernel can be executed on.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups)
Static function to check if given info will lead to a valid configuration of CLChannelShuffleLayerKer...
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
const StringSet & options() const
Gets the current options list set.
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
std::string lower_string(const std::string &val)
Lower a given string.
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Copyright (c) 2017-2022 Arm Limited.
void add_option(std::string option)
Adds option to the existing build option list.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups)
Configure function's inputs and outputs.
const unsigned int num_groups
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
Elementeise CL kernel type.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
bool has_padding_changed(const std::unordered_map< const ITensorInfo *, PaddingSize > &padding_map)
Check if the previously stored padding info has changed after configuring a kernel.
std::pair< Status, Window > validate_and_configure_window(ITensorInfo *src, ITensorInfo *dst)
Interface for OpenCL tensor.
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
std::string get_cl_unsigned_type_from_element_size(size_t element_size)
Translates the element size to an unsigned integer data type.
std::unordered_map< const ITensorInfo *, PaddingSize > get_padding_info(std::initializer_list< const ITensorInfo *> infos)
Stores padding information before configuring a kernel.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size.
CLChannelShuffleLayerKernel()
Default constructor.
void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 4D tensor's parameters to the object's kernel's arguments starting from the index idx...
DataLayout
[DataLayout enum definition]
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.