Compute Library
 20.11
CLMinMaxLocationKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
33 #include "support/StringSupport.h"
34 
35 #include <climits>
36 
37 namespace arm_compute
38 {
39 inline int32_t FloatFlip(float val)
40 {
41  static_assert(sizeof(float) == sizeof(int32_t), "Float must be same size as int32_t");
42  int32_t int_val = 0;
43 
44  memcpy(&int_val, &val, sizeof(float));
45  int_val = (int_val >= 0) ? int_val : int_val ^ 0x7FFFFFFF;
46  return int_val;
47 }
48 
49 inline float IFloatFlip(int32_t val)
50 {
51  static_assert(sizeof(float) == sizeof(int32_t), "Float must be same size as int32_t");
52  float flt_val = 0.f;
53 
54  val = (val >= 0) ? val : val ^ 0x7FFFFFFF;
55  memcpy(&flt_val, &val, sizeof(float));
56  return flt_val;
57 }
58 
60  : _input(nullptr), _min_max(), _data_type_max_min()
61 {
62 }
63 
64 void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max)
65 {
66  configure(CLKernelLibrary::get().get_compile_context(), input, min_max);
67 }
68 
69 void CLMinMaxKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max)
70 {
73  ARM_COMPUTE_ERROR_ON(min_max == nullptr);
74 
75  _input = input;
76  _min_max = min_max;
77  const unsigned int num_elems_processed_per_iteration = input->info()->dimension(0);
78 
79  switch(input->info()->data_type())
80  {
81  case DataType::U8:
82  _data_type_max_min[0] = UCHAR_MAX;
83  _data_type_max_min[1] = 0;
84  break;
85  case DataType::S16:
86  _data_type_max_min[0] = SHRT_MAX;
87  _data_type_max_min[1] = SHRT_MIN;
88  break;
89  case DataType::F32:
90  _data_type_max_min[0] = FloatFlip(std::numeric_limits<float>::max());
91  _data_type_max_min[1] = FloatFlip(std::numeric_limits<float>::lowest());
92  break;
93  default:
94  ARM_COMPUTE_ERROR("You called with the wrong image data types");
95  }
96 
97  // Set kernel build options
98  std::set<std::string> build_opts{ "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()) };
99 
100  if(num_elems_processed_per_iteration % max_cl_vector_width != 0)
101  {
102  build_opts.emplace("-DNON_MULTIPLE_OF_16");
103  }
104 
105  if(input->info()->data_type() == DataType::F32)
106  {
107  build_opts.emplace("-DDATA_TYPE_MAX=" + support::cpp11::to_string(std::numeric_limits<float>::max()));
108  build_opts.emplace("-DDATA_TYPE_MIN=" + support::cpp11::to_string(std::numeric_limits<float>::lowest()));
109  build_opts.emplace("-DIS_DATA_TYPE_FLOAT");
110  }
111  else
112  {
113  build_opts.emplace("-DDATA_TYPE_MAX=" + support::cpp11::to_string(_data_type_max_min[0]));
114  build_opts.emplace("-DDATA_TYPE_MIN=" + support::cpp11::to_string(_data_type_max_min[1]));
115  }
116 
117  // Create kernel
118  _kernel = create_kernel(compile_context, "minmax", build_opts);
119 
120  // Set fixed arguments
121  unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
122  _kernel.setArg(idx++, *_min_max);
123  _kernel.setArg<cl_int>(idx++, static_cast<cl_int>(input->info()->dimension(0)));
124 
125  // Configure kernel window
128  ICLKernel::configure_internal(win);
129 }
130 
131 void CLMinMaxKernel::run(const Window &window, cl::CommandQueue &queue)
132 {
135 
136  // Reset mininum and maximum values
137  queue.enqueueWriteBuffer(*_min_max, CL_FALSE /* blocking */, 0, _data_type_max_min.size() * sizeof(int), _data_type_max_min.data());
138 
140  do
141  {
142  unsigned int idx = 0;
143  add_2D_tensor_argument(idx, _input, slice);
144  enqueue(queue, *this, slice, lws_hint());
145  }
147 
148  cl_int min = 0;
149  cl_int max = 0;
150  queue.enqueueReadBuffer(*_min_max, CL_TRUE /* blocking */, 0 * sizeof(cl_int), sizeof(cl_int), static_cast<int *>(&min));
151  queue.enqueueReadBuffer(*_min_max, CL_TRUE /* blocking */, 1 * sizeof(cl_int), sizeof(cl_int), static_cast<int *>(&max));
152 
153  if(_input->info()->data_type() == DataType::F32)
154  {
155  std::array<float, 2> min_max =
156  {
157  {
158  IFloatFlip(min),
159  IFloatFlip(max)
160  }
161  };
162  queue.enqueueWriteBuffer(*_min_max, CL_TRUE /* blocking */, 0, min_max.size() * sizeof(float), min_max.data());
163  }
164  else
165  {
166  std::array<int32_t, 2> min_max = { { min, max } };
167  queue.enqueueWriteBuffer(*_min_max, CL_TRUE /* blocking */, 0, min_max.size() * sizeof(int32_t), min_max.data());
168  }
169 }
170 
172  : _input(nullptr), _min_max_count(nullptr)
173 {
174 }
175 
176 void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc, ICLCoordinates2DArray *max_loc)
177 {
178  configure(CLKernelLibrary::get().get_compile_context(), input, min_max, min_max_count, min_loc, max_loc);
179 }
180 
181 void CLMinMaxLocationKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc,
182  ICLCoordinates2DArray *max_loc)
183 {
186  ARM_COMPUTE_ERROR_ON(min_max == nullptr);
187  ARM_COMPUTE_ERROR_ON(min_max_count == nullptr && min_loc == nullptr && max_loc == nullptr);
188 
189  _input = input;
190  _min_max_count = min_max_count;
191 
192  // Set kernel build options
193  std::set<std::string> build_opts;
194  build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
195  build_opts.emplace((min_max_count != nullptr) ? "-DCOUNT_MIN_MAX" : "");
196  build_opts.emplace((min_loc != nullptr) ? "-DLOCATE_MIN" : "");
197  build_opts.emplace((max_loc != nullptr) ? "-DLOCATE_MAX" : "");
198  if(input->info()->data_type() == DataType::F32)
199  {
200  build_opts.emplace("-DIS_DATA_TYPE_FLOAT");
201  }
202 
203  // Create kernel
204  _kernel = create_kernel(compile_context, "minmaxloc", build_opts);
205 
206  // Set static arguments
207  unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
208  _kernel.setArg(idx++, *min_max);
209  _kernel.setArg(idx++, *min_max_count);
210  if(min_loc != nullptr)
211  {
212  _kernel.setArg(idx++, min_loc->cl_buffer());
213  _kernel.setArg<cl_uint>(idx++, min_loc->max_num_values());
214  }
215  if(max_loc != nullptr)
216  {
217  _kernel.setArg(idx++, max_loc->cl_buffer());
218  _kernel.setArg<cl_uint>(idx++, max_loc->max_num_values());
219  }
220 
221  // Configure kernel window
222  constexpr unsigned int num_elems_processed_per_iteration = 1;
225  ICLKernel::configure_internal(win);
226 }
227 
228 void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue)
229 {
232 
233  static const unsigned int zero_count = 0;
234  queue.enqueueWriteBuffer(*_min_max_count, CL_FALSE, 0 * sizeof(zero_count), sizeof(zero_count), &zero_count);
235  queue.enqueueWriteBuffer(*_min_max_count, CL_FALSE, 1 * sizeof(zero_count), sizeof(zero_count), &zero_count);
236 
238  do
239  {
240  unsigned int idx = 0;
241  add_2D_tensor_argument(idx, _input, slice);
242  enqueue(queue, *this, slice, lws_hint());
243  }
245 }
246 } // namespace arm_compute
Window first_slice_window_2D() const
First 2D slice of the window.
Definition: Window.h:283
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
Definition: IGCKernel.cpp:41
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t)
Definition: Validate.h:856
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:264
1 channel, 1 U8 per channel
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Definition: Window.h:323
Copyright (c) 2017-2020 Arm Limited.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:403
Interface for OpenCL Array.
Definition: ICLArray.h:35
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
void configure(const ICLImage *input, cl::Buffer *min_max)
Initialise the kernel's input and output.
void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc=nullptr, ICLCoordinates2DArray *max_loc=nullptr)
Initialise the kernel's input and outputs.
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Definition: Utils.h:71
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
Implementation of a row access pattern.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:37
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
float IFloatFlip(int32_t val)
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
Definition: ICLKernel.h:194
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
1 channel, 1 S16 per channel
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:790
CLCompileContext class.
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:136
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
size_t max_num_values() const
Maximum number of values which can be stored in this array.
Definition: IArray.h:58
unsigned int num_elems_processed_per_iteration
int32_t FloatFlip(float val)
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
Describe a multidimensional execution window.
Definition: Window.h:39
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
CLMinMaxKernel()
Default constructor.