41 static_assert(
sizeof(
float) ==
sizeof(int32_t),
"Float must be same size as int32_t");
44 memcpy(&int_val, &val,
sizeof(
float));
45 int_val = (int_val >= 0) ? int_val : int_val ^ 0x7FFFFFFF;
51 static_assert(
sizeof(
float) ==
sizeof(int32_t),
"Float must be same size as int32_t");
54 val = (val >= 0) ? val : val ^ 0x7FFFFFFF;
55 memcpy(&flt_val, &val,
sizeof(
float));
60 : _input(nullptr), _min_max(), _data_type_max_min()
82 _data_type_max_min[0] = UCHAR_MAX;
83 _data_type_max_min[1] = 0;
86 _data_type_max_min[0] = SHRT_MAX;
87 _data_type_max_min[1] = SHRT_MIN;
90 _data_type_max_min[0] =
FloatFlip(std::numeric_limits<float>::max());
100 if(num_elems_processed_per_iteration % max_cl_vector_width != 0)
102 build_opts.emplace(
"-DNON_MULTIPLE_OF_16");
109 build_opts.emplace(
"-DIS_DATA_TYPE_FLOAT");
118 _kernel =
create_kernel(compile_context,
"minmax", build_opts);
122 _kernel.setArg(idx++, *_min_max);
123 _kernel.setArg<cl_int>(idx++,
static_cast<cl_int
>(input->
info()->
dimension(0)));
128 ICLKernel::configure_internal(win);
137 queue.enqueueWriteBuffer(*_min_max, CL_FALSE , 0, _data_type_max_min.size() *
sizeof(int), _data_type_max_min.data());
142 unsigned int idx = 0;
150 queue.enqueueReadBuffer(*_min_max, CL_TRUE , 0 *
sizeof(cl_int),
sizeof(cl_int), static_cast<int *>(&min));
151 queue.enqueueReadBuffer(*_min_max, CL_TRUE , 1 *
sizeof(cl_int),
sizeof(cl_int), static_cast<int *>(&max));
155 std::array<float, 2> min_max =
162 queue.enqueueWriteBuffer(*_min_max, CL_TRUE , 0, min_max.size() *
sizeof(float), min_max.data());
166 std::array<int32_t, 2> min_max = { { min, max } };
167 queue.enqueueWriteBuffer(*_min_max, CL_TRUE , 0, min_max.size() *
sizeof(int32_t), min_max.data());
172 : _input(nullptr), _min_max_count(nullptr)
190 _min_max_count = min_max_count;
193 std::set<std::string> build_opts;
195 build_opts.emplace((min_max_count !=
nullptr) ?
"-DCOUNT_MIN_MAX" :
"");
196 build_opts.emplace((min_loc !=
nullptr) ?
"-DLOCATE_MIN" :
"");
197 build_opts.emplace((max_loc !=
nullptr) ?
"-DLOCATE_MAX" :
"");
200 build_opts.emplace(
"-DIS_DATA_TYPE_FLOAT");
204 _kernel =
create_kernel(compile_context,
"minmaxloc", build_opts);
208 _kernel.setArg(idx++, *min_max);
209 _kernel.setArg(idx++, *min_max_count);
210 if(min_loc !=
nullptr)
212 _kernel.setArg(idx++, min_loc->
cl_buffer());
215 if(max_loc !=
nullptr)
217 _kernel.setArg(idx++, max_loc->
cl_buffer());
225 ICLKernel::configure_internal(win);
233 static const unsigned int zero_count = 0;
234 queue.enqueueWriteBuffer(*_min_max_count, CL_FALSE, 0 *
sizeof(zero_count),
sizeof(zero_count), &zero_count);
235 queue.enqueueWriteBuffer(*_min_max_count, CL_FALSE, 1 *
sizeof(zero_count),
sizeof(zero_count), &zero_count);
240 unsigned int idx = 0;
Window first_slice_window_2D() const
First 2D slice of the window.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t)
CLMinMaxLocationKernel()
Constructor.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
1 channel, 1 U8 per channel
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Copyright (c) 2017-2021 Arm Limited.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Interface for OpenCL Array.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
void configure(const ICLImage *input, cl::Buffer *min_max)
Initialise the kernel's input and output.
void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc=nullptr, ICLCoordinates2DArray *max_loc=nullptr)
Initialise the kernel's input and outputs.
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Class to describe a number of elements in each dimension.
Implementation of a row access pattern.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
float IFloatFlip(int32_t val)
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
1 channel, 1 S16 per channel
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx...
Interface for OpenCL tensor.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
size_t max_num_values() const
Maximum number of values which can be stored in this array.
unsigned int num_elems_processed_per_iteration
int32_t FloatFlip(float val)
int int_val(TokenStream &in, bool &valid)
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
CLMinMaxKernel()
Default constructor.