34 if(kernel.
kernel()() ==
nullptr)
39 for(
unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
46 cl::NDRange gws = ICLKernel::gws_from_window(window);
49 if(gws.dimensions() == 0)
55 if(use_dummy_work_items)
61 cl::NDRange valid_lws;
64 valid_lws = cl::NullRange;
71 cl::NDRange lws = cl::NullRange;
73 if((valid_lws[0] <= gws[0]) && (valid_lws[1] <= gws[1]) && (valid_lws[2] <= gws[2]))
78 if(CLKernelLibrary::get().is_wbsm_supported())
82 queue.enqueueNDRangeKernel(kernel.
kernel(), cl::NullRange, gws, lws);
87 template <
unsigned int dimension_size>
88 void ICLKernel::add_tensor_argument(
unsigned &idx,
const ICLTensor *tensor,
const Window &window)
100 offset_first_element += (window.
is_broadcasted(
n) ? 0 : window[
n].start()) * strides[
n];
103 unsigned int idx_start = idx;
104 _kernel.setArg(idx++, tensor->
cl_buffer());
106 for(
unsigned int d = 0; d < dimension_size; ++d)
108 _kernel.setArg<cl_uint>(idx++, window.
is_broadcasted(d) ? 0 : strides[d]);
109 _kernel.setArg<cl_uint>(idx++, window.
is_broadcasted(d) ? 0 : (strides[d] * window[d].step()));
112 _kernel.setArg<cl_uint>(idx++, offset_first_element);
115 "add_%dD_tensor_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_tensor<dimension_size>());
128 _kernel.setArg(idx++, tensor->
cl_buffer());
131 _kernel.setArg<cl_uint>(idx++, strides[1]);
132 _kernel.setArg<cl_uint>(idx++, strides[2]);
135 _kernel.setArg<cl_uint>(idx++, info->
dimension(0));
136 _kernel.setArg<cl_uint>(idx++, info->
dimension(1));
137 _kernel.setArg<cl_uint>(idx++, info->
dimension(2));
141 _kernel.setArg<cl_uint>(idx++, offset_first_element);
153 _kernel.setArg(idx++, tensor->
cl_buffer());
156 _kernel.setArg<cl_uint>(idx++, strides[1]);
157 _kernel.setArg<cl_uint>(idx++, strides[2]);
158 _kernel.setArg<cl_uint>(idx++, strides[3]);
161 _kernel.setArg<cl_uint>(idx++, info->
dimension(0));
162 _kernel.setArg<cl_uint>(idx++, info->
dimension(1));
163 _kernel.setArg<cl_uint>(idx++, info->
dimension(2));
164 _kernel.setArg<cl_uint>(idx++, info->
dimension(3));
168 _kernel.setArg<cl_uint>(idx++, offset_first_element);
171 #ifndef DOXYGEN_SKIP_THIS 172 template void ICLKernel::add_tensor_argument<1>(
unsigned &idx,
const ICLTensor *tensor,
const Window &
window);
173 template void ICLKernel::add_tensor_argument<2>(
unsigned &idx,
const ICLTensor *tensor,
const Window &
window);
174 template void ICLKernel::add_tensor_argument<3>(
unsigned &idx,
const ICLTensor *tensor,
const Window &
window);
175 template void ICLKernel::add_tensor_argument<4>(
unsigned &idx,
const ICLTensor *tensor,
const Window &
window);
185 if(_max_workgroup_size == 0)
189 return _max_workgroup_size;
194 if((window.
x().
end() - window.
x().
start()) == 0 || (window.
y().
end() - window.
y().
start()) == 0)
196 return cl::NullRange;
199 cl::NDRange gws((window.
x().
end() - window.
x().
start()) / window.
x().
step(),
void add_4d_tensor_nhwc_argument(unsigned int &idx, const ICLTensor *tensor)
Add the passed NHWC 4D tensor's parameters to the object's kernel's arguments by passing strides...
static cl::NDRange gws_from_window(const Window &window)
Get the global work size given an execution window.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
const Window & window() const
The maximum window the kernel can be executed on.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
unsigned int get_next_power_two(unsigned int x)
Given an integer value, this function returns the next power of two.
void add_3d_tensor_nhw_argument(unsigned int &idx, const ICLTensor *tensor)
Add the passed NHW 3D tensor's parameters to the object's kernel's arguments by passing strides...
cl::Kernel & kernel()
Returns a reference to the OpenCL kernel of this object.
constexpr int step() const
Return the step of the dimension.
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
cl_int wbsm_hint() const
Return the workgroup batch size modifier hint.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg,...)
constexpr const Dimension & z() const
Alias to access the third dimension of the window.
Common interface for all the OpenCL kernels.
Copyright (c) 2017-2021 Arm Limited.
size_t max_local_workgroup_size(const cl::Kernel &kernel) const
Find the maximum number of local work items in a workgroup can be supported for the kernel...
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void end(TokenStream &in, bool &valid)
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
bool is_broadcasted(size_t dimension) const
Return whether a dimension has been broadcasted.
Strides of an item in bytes.
virtual size_t offset_first_element_in_bytes() const =0
The offset from the beginning of the memory allocation to the first element of the tensor...
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Interface for OpenCL tensor.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
size_t get_max_workgroup_size()
Get the maximum workgroup size for the device the CLKernelLibrary uses.
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
void set_target(GPUTarget target)
Set the targeted GPU architecture.
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
constexpr int end() const
Return the end of the dimension.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.