Compute Library
 20.02.1
CLMinMaxLocationKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2019 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
32 
33 #include <climits>
34 
35 namespace arm_compute
36 {
37 inline int32_t FloatFlip(float val)
38 {
39  static_assert(sizeof(float) == sizeof(int32_t), "Float must be same size as int32_t");
40  int32_t int_val = 0;
41 
42  memcpy(&int_val, &val, sizeof(float));
43  int_val = (int_val >= 0) ? int_val : int_val ^ 0x7FFFFFFF;
44  return int_val;
45 }
46 
47 inline float IFloatFlip(int32_t val)
48 {
49  static_assert(sizeof(float) == sizeof(int32_t), "Float must be same size as int32_t");
50  float flt_val = 0.f;
51 
52  val = (val >= 0) ? val : val ^ 0x7FFFFFFF;
53  memcpy(&flt_val, &val, sizeof(float));
54  return flt_val;
55 }
56 
58  : _input(nullptr), _min_max(), _data_type_max_min()
59 {
60 }
61 
62 void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max)
63 {
66  ARM_COMPUTE_ERROR_ON(min_max == nullptr);
67 
68  _input = input;
69  _min_max = min_max;
70  const unsigned int num_elems_processed_per_iteration = input->info()->dimension(0);
71 
72  switch(input->info()->data_type())
73  {
74  case DataType::U8:
75  _data_type_max_min[0] = UCHAR_MAX;
76  _data_type_max_min[1] = 0;
77  break;
78  case DataType::S16:
79  _data_type_max_min[0] = SHRT_MAX;
80  _data_type_max_min[1] = SHRT_MIN;
81  break;
82  case DataType::F32:
83  _data_type_max_min[0] = FloatFlip(std::numeric_limits<float>::max());
84  _data_type_max_min[1] = FloatFlip(std::numeric_limits<float>::lowest());
85  break;
86  default:
87  ARM_COMPUTE_ERROR("You called with the wrong image data types");
88  }
89 
90  // Set kernel build options
91  std::set<std::string> build_opts{ "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()) };
92 
93  if(num_elems_processed_per_iteration % max_cl_vector_width != 0)
94  {
95  build_opts.emplace("-DNON_MULTIPLE_OF_16");
96  }
97 
98  if(input->info()->data_type() == DataType::F32)
99  {
100  build_opts.emplace("-DDATA_TYPE_MAX=" + support::cpp11::to_string(std::numeric_limits<float>::max()));
101  build_opts.emplace("-DDATA_TYPE_MIN=" + support::cpp11::to_string(std::numeric_limits<float>::lowest()));
102  build_opts.emplace("-DIS_DATA_TYPE_FLOAT");
103  }
104  else
105  {
106  build_opts.emplace("-DDATA_TYPE_MAX=" + support::cpp11::to_string(_data_type_max_min[0]));
107  build_opts.emplace("-DDATA_TYPE_MIN=" + support::cpp11::to_string(_data_type_max_min[1]));
108  }
109 
110  // Create kernel
111  _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("minmax", build_opts));
112 
113  // Set fixed arguments
114  unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
115  _kernel.setArg(idx++, *_min_max);
116  _kernel.setArg<cl_int>(idx++, static_cast<cl_int>(input->info()->dimension(0)));
117 
118  // Configure kernel window
121  ICLKernel::configure_internal(win);
122 }
123 
124 void CLMinMaxKernel::run(const Window &window, cl::CommandQueue &queue)
125 {
128 
129  // Reset mininum and maximum values
130  queue.enqueueWriteBuffer(*_min_max, CL_FALSE /* blocking */, 0, _data_type_max_min.size() * sizeof(int), _data_type_max_min.data());
131 
133  do
134  {
135  unsigned int idx = 0;
136  add_2D_tensor_argument(idx, _input, slice);
137  enqueue(queue, *this, slice, lws_hint());
138  }
140 
141  cl_int min = 0;
142  cl_int max = 0;
143  queue.enqueueReadBuffer(*_min_max, CL_TRUE /* blocking */, 0 * sizeof(cl_int), sizeof(cl_int), static_cast<int *>(&min));
144  queue.enqueueReadBuffer(*_min_max, CL_TRUE /* blocking */, 1 * sizeof(cl_int), sizeof(cl_int), static_cast<int *>(&max));
145 
146  if(_input->info()->data_type() == DataType::F32)
147  {
148  std::array<float, 2> min_max =
149  {
150  {
151  IFloatFlip(min),
152  IFloatFlip(max)
153  }
154  };
155  queue.enqueueWriteBuffer(*_min_max, CL_TRUE /* blocking */, 0, min_max.size() * sizeof(float), min_max.data());
156  }
157  else
158  {
159  std::array<int32_t, 2> min_max = { { min, max } };
160  queue.enqueueWriteBuffer(*_min_max, CL_TRUE /* blocking */, 0, min_max.size() * sizeof(int32_t), min_max.data());
161  }
162 }
163 
165  : _input(nullptr), _min_max_count(nullptr)
166 {
167 }
168 
169 void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc, ICLCoordinates2DArray *max_loc)
170 {
173  ARM_COMPUTE_ERROR_ON(min_max == nullptr);
174  ARM_COMPUTE_ERROR_ON(min_max_count == nullptr && min_loc == nullptr && max_loc == nullptr);
175 
176  _input = input;
177  _min_max_count = min_max_count;
178 
179  // Set kernel build options
180  std::set<std::string> build_opts;
181  build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
182  build_opts.emplace((min_max_count != nullptr) ? "-DCOUNT_MIN_MAX" : "");
183  build_opts.emplace((min_loc != nullptr) ? "-DLOCATE_MIN" : "");
184  build_opts.emplace((max_loc != nullptr) ? "-DLOCATE_MAX" : "");
185  if(input->info()->data_type() == DataType::F32)
186  {
187  build_opts.emplace("-DIS_DATA_TYPE_FLOAT");
188  }
189 
190  // Create kernel
191  _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("minmaxloc", build_opts));
192 
193  // Set static arguments
194  unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
195  _kernel.setArg(idx++, *min_max);
196  _kernel.setArg(idx++, *min_max_count);
197  if(min_loc != nullptr)
198  {
199  _kernel.setArg(idx++, min_loc->cl_buffer());
200  _kernel.setArg<cl_uint>(idx++, min_loc->max_num_values());
201  }
202  if(max_loc != nullptr)
203  {
204  _kernel.setArg(idx++, max_loc->cl_buffer());
205  _kernel.setArg<cl_uint>(idx++, max_loc->max_num_values());
206  }
207 
208  // Configure kernel window
209  constexpr unsigned int num_elems_processed_per_iteration = 1;
212  ICLKernel::configure_internal(win);
213 }
214 
215 void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue)
216 {
219 
220  static const unsigned int zero_count = 0;
221  queue.enqueueWriteBuffer(*_min_max_count, CL_FALSE, 0 * sizeof(zero_count), sizeof(zero_count), &zero_count);
222  queue.enqueueWriteBuffer(*_min_max_count, CL_FALSE, 1 * sizeof(zero_count), sizeof(zero_count), &zero_count);
223 
225  do
226  {
227  unsigned int idx = 0;
228  add_2D_tensor_argument(idx, _input, slice);
229  enqueue(queue, *this, slice, lws_hint());
230  }
232 }
233 } // namespace arm_compute
Window first_slice_window_2D() const
First 2D slice of the window.
Definition: Window.h:281
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:39
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:247
1 channel, 1 U8 per channel
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps=Steps(), bool skip_border=false, BorderSize border_size=BorderSize())
Calculate the maximum window for a given tensor shape and border setting.
Definition: Helpers.cpp:28
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Definition: Window.h:321
Copyright (c) 2017-2020 ARM Limited.
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t)
Definition: Validate.h:856
Interface for OpenCL Array.
Definition: ICLArray.h:35
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: Helpers.h:402
void configure(const ICLImage *input, cl::Buffer *min_max)
Initialise the kernel's input and output.
void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc=nullptr, ICLCoordinates2DArray *max_loc=nullptr)
Initialise the kernel's input and outputs.
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Definition: Utils.h:66
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
Implementation of a row access pattern.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:37
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
float IFloatFlip(int32_t val)
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
Definition: ICLKernel.h:192
std::unique_ptr< Kernel > create_kernel()
Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object.
Definition: Helpers.h:86
1 channel, 1 S16 per channel
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:790
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:134
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
size_t max_num_values() const
Maximum number of values which can be stored in this array.
Definition: IArray.h:58
unsigned int num_elems_processed_per_iteration
int32_t FloatFlip(float val)
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
Describe a multidimensional execution window.
Definition: Window.h:39
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
CLMinMaxKernel()
Default constructor.