34 if(kernel.
kernel()() ==
nullptr)
39 for(
unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
46 cl::NDRange gws = ICLKernel::gws_from_window(window);
49 if(gws.dimensions() == 0)
55 if(use_dummy_work_items)
61 cl::NDRange valid_lws;
64 valid_lws = cl::NullRange;
71 cl::NDRange lws = cl::NullRange;
73 if((valid_lws[0] <= gws[0]) && (valid_lws[1] <= gws[1]) && (valid_lws[2] <= gws[2]))
78 if(CLKernelLibrary::get().is_wbsm_supported())
82 queue.enqueueNDRangeKernel(kernel.
kernel(), cl::NullRange, gws, lws);
87 template <
unsigned int dimension_size>
88 void ICLKernel::add_tensor_argument(
unsigned &idx,
const ICLTensor *tensor,
const Window &window)
100 offset_first_element += (window.
is_broadcasted(n) ? 0 : window[n].start()) * strides[n];
103 unsigned int idx_start = idx;
104 _kernel.setArg(idx++, tensor->
cl_buffer());
106 for(
unsigned int d = 0; d < dimension_size; ++d)
108 _kernel.setArg<cl_uint>(idx++, strides[d]);
109 _kernel.setArg<cl_uint>(idx++, strides[d] * window[d].step());
112 _kernel.setArg<cl_uint>(idx++, offset_first_element);
115 "add_%dD_tensor_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_tensor<dimension_size>());
119 #ifndef DOXYGEN_SKIP_THIS 120 template void ICLKernel::add_tensor_argument<1>(
unsigned &idx,
const ICLTensor *tensor,
const Window &
window);
121 template void ICLKernel::add_tensor_argument<2>(
unsigned &idx,
const ICLTensor *tensor,
const Window &
window);
122 template void ICLKernel::add_tensor_argument<3>(
unsigned &idx,
const ICLTensor *tensor,
const Window &
window);
123 template void ICLKernel::add_tensor_argument<4>(
unsigned &idx,
const ICLTensor *tensor,
const Window &
window);
133 if(_max_workgroup_size == 0)
137 return _max_workgroup_size;
142 if((window.
x().
end() - window.
x().
start()) == 0 || (window.
y().
end() - window.
y().
start()) == 0)
144 return cl::NullRange;
147 cl::NDRange gws((window.
x().
end() - window.
x().
start()) / window.
x().
step(),
static cl::NDRange gws_from_window(const Window &window)
Get the global work size given an execution window.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
const Window & window() const
The maximum window the kernel can be executed on.
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
unsigned int get_next_power_two(unsigned int x)
Given an integer value, this function returns the next power of two.
cl::Kernel & kernel()
Returns a reference to the OpenCL kernel of this object.
constexpr int step() const
Return the step of the dimension.
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
cl_int wbsm_hint() const
Return the workgroup batch size modifier hint.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg,...)
constexpr const Dimension & z() const
Alias to access the third dimension of the window.
Common interface for all the OpenCL kernels.
Copyright (c) 2017-2021 Arm Limited.
size_t max_local_workgroup_size(const cl::Kernel &kernel) const
Find the maximum number of local work items in a workgroup can be supported for the kernel...
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void end(TokenStream &in, bool &valid)
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
bool is_broadcasted(size_t dimension) const
Return whether a dimension has been broadcasted.
Strides of an item in bytes.
virtual size_t offset_first_element_in_bytes() const =0
The offset from the beginning of the memory allocation to the first element of the tensor...
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Interface for OpenCL tensor.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
size_t get_max_workgroup_size()
Get the maximum workgroup size for the device the CLKernelLibrary uses.
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
void set_target(GPUTarget target)
Set the targeted GPU architecture.
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
constexpr int end() const
Return the end of the dimension.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.