34 if(kernel.
kernel()() ==
nullptr)
39 for(
unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
46 cl::NDRange gws = ICLKernel::gws_from_window(window);
49 if(gws.dimensions() == 0)
55 if(use_dummy_work_items)
61 cl::NDRange valid_lws;
64 valid_lws = cl::NullRange;
71 cl::NDRange lws = cl::NullRange;
73 if((valid_lws[0] <= gws[0]) && (valid_lws[1] <= gws[1]) && (valid_lws[2] <= gws[2]))
78 if(CLKernelLibrary::get().is_wbsm_supported())
82 queue.enqueueNDRangeKernel(kernel.
kernel(), cl::NullRange, gws, lws);
87 template <
unsigned int dimension_size>
88 void ICLKernel::add_tensor_argument(
unsigned &idx,
const ICLTensor *tensor,
const Window &window)
96 unsigned int offset_first_element =
info->offset_first_element_in_bytes();
98 for(
unsigned int n = 0; n <
info->num_dimensions(); ++n)
103 unsigned int idx_start = idx;
104 _kernel.setArg(idx++, tensor->
cl_buffer());
106 for(
unsigned int d = 0; d < dimension_size; ++d)
112 _kernel.setArg<cl_uint>(idx++, offset_first_element);
115 "add_%dD_tensor_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_tensor<dimension_size>());
119 #ifndef DOXYGEN_SKIP_THIS 120 template void ICLKernel::add_tensor_argument<1>(
unsigned &idx,
const ICLTensor *tensor,
const Window &window);
121 template void ICLKernel::add_tensor_argument<2>(
unsigned &idx,
const ICLTensor *tensor,
const Window &window);
122 template void ICLKernel::add_tensor_argument<3>(
unsigned &idx,
const ICLTensor *tensor,
const Window &window);
123 template void ICLKernel::add_tensor_argument<4>(
unsigned &idx,
const ICLTensor *tensor,
const Window &window);
133 if(_max_workgroup_size == 0)
137 return _max_workgroup_size;
144 return cl::NullRange;
static cl::NDRange gws_from_window(const Window &window)
Get the global work size given an execution window.
const Window & window() const
The maximum window the kernel can be executed on.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
unsigned int get_next_power_two(unsigned int x)
Given an integer value, this function returns the next power of two.
cl::Kernel & kernel()
Returns a reference to the OpenCL kernel of this object.
constexpr int step() const
Return the step of the dimension.
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
cl_int wbsm_hint() const
Return the workgroup batch size modifier hint.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg,...)
constexpr const Dimension & z() const
Alias to access the third dimension of the window.
Common interface for all the OpenCL kernels.
Copyright (c) 2017-2021 Arm Limited.
size_t max_local_workgroup_size(const cl::Kernel &kernel) const
Find the maximum number of local work items in a workgroup can be supported for the kernel.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void end(TokenStream &in, bool &valid)
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
bool is_broadcasted(size_t dimension) const
Return whether a dimension has been broadcasted.
Strides of an item in bytes.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Interface for OpenCL tensor.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
size_t get_max_workgroup_size()
Get the maximum workgroup size for the device the CLKernelLibrary uses.
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
void set_target(GPUTarget target)
Set the targeted GPU architecture.
constexpr int end() const
Return the end of the dimension.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.