24 #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION 36 namespace experimental
38 namespace dynamic_fusion
54 IClKernel::configure_internal(cl_code.
window);
63 inline void ClCompositeKernel::add_tensor_argument(
unsigned int &idx,
const ClKernelArgDescriptor &arg,
const ICLTensor *tensor,
const Window &arg_slice, std::vector<cl::Image2D> &cl_images)
75 add_1D_tensor_argument(idx, tensor, arg_slice);
81 add_2D_tensor_argument(idx, tensor, arg_slice);
86 add_2D_tensor_argument(idx, tensor, arg_slice);
88 _kernel.setArg<cl_uint>(idx++,
static_cast<unsigned int>(total_cross_plane_pad));
96 cl_images.push_back(tensor_image2d);
97 _kernel.setArg(idx++, tensor_image2d);
103 add_2D_tensor_argument(idx, tensor, arg_slice);
104 _kernel.setArg<cl_uint>(idx++,
static_cast<unsigned int>(tensor->
info()->
strides_in_bytes()[2]));
112 cl_images.push_back(tensor_image2d);
113 _kernel.setArg(idx++, tensor_image2d);
114 _kernel.setArg<cl_uint>(idx++,
static_cast<unsigned int>(tensor->
info()->
strides_in_bytes()[2]));
120 add_3D_tensor_argument(idx, tensor, arg_slice);
126 add_4D_tensor_argument(idx, tensor, arg_slice);
131 add_4d_tensor_nhwc_argument(idx, tensor);
142 cl_images.push_back(tensor_image2d);
144 _kernel.setArg(idx++, tensor_image2d);
145 add_4d_tensor_nhwc_argument(idx, tensor);
168 unsigned int idx = 0;
174 std::vector<cl::Image2D> cl_images;
175 for(
auto id_arg : _arguments)
177 const auto arg = id_arg.second;
178 auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.
get_tensor(arg.
arg_id));
185 arg_slice = slice_fixed_z;
187 add_tensor_argument(idx, arg, tensor, arg_slice, cl_images);
191 bool use_dummy_work_items =
false;
192 enqueue(queue, *
this, slice, lws_hint(), use_dummy_work_items);
std::string name
Kernel name.
unsigned int top
top of the border
const Window & window() const
The maximum window the kernel can be executed on.
ClKernelLibrary contains all the OpenCL kernels that are used throughout the library.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
const StringSet & options() const
Gets the current options list set.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
void configure(const opencl::ClCompileContext &, const ClKernelCode &)
virtual DataType data_type() const =0
Data type used for each element of the tensor.
Window window
Execution window.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
size_t total_size_upper(size_t dimension) const
Collapses given dimension and above.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Describe one of the image's dimensions with a start, end and step.
unsigned int bottom
bottom of the border
Copyright (c) 2017-2022 Arm Limited.
Describes all the info required to add a kernel argument at run time.
const std::string & kernel_path() const
Gets the path that the kernels reside in.
Contains kernel code to be compiled and run in a ClUnitWorkload.
Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa.
Descriptor containing information required to run a single ClWorkload.
static ClKernelLibrary & get()
Access the KernelLibrary singleton.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
ClKernelTensorArgType tensor_arg_type
tensor argument type
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::string code
Kernel source code.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
virtual PaddingSize padding() const =0
Padding of tensor.
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
std::string config_id
Generated from blueprint based on complex component.
CLBuildOptions build_options
Kernel build options.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Interface for OpenCL tensor.
bool skip_sliding_window
Skip sliding window slices during execution loop.
virtual void run_composite_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue, const ClExecutionDescriptor &exec_desc) override
Run the composite kernel.
ClKernelArgList arguments
Kernel argument descriptors.
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch)
Create a cl::Image2D object from an OpenCL buffer.
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
Window first_slice_window_3D() const
First 3D slice of the window.
int arg_id
Arg ID in the blueprint, -1 means empty / uninitialized.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)