Compute Library
 21.11
CLRemapKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
34 
35 namespace arm_compute
36 {
38  : _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr), _data_layout(DataLayout::NCHW)
39 {
41 }
42 
44 {
45  return _data_layout == DataLayout::NCHW ? BorderSize(1) : BorderSize(0);
46 }
47 
48 template <class T>
49 void CLRemapKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value)
50 {
51  T value;
52  constant_border_value.get(value);
53  ICLKernel::add_argument<T>(idx, static_cast<T>(value));
54 }
55 
57 {
58  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, map_x, map_y, output);
59  if(input->data_layout() == DataLayout::NCHW)
60  {
62  }
63  else
64  {
66  }
67  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() != output->data_type(), "Input/output have different data types");
70  ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.policy == InterpolationPolicy::AREA, "Area interpolation is not supported!");
72  return Status{};
73 }
74 
75 void CLRemapKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, RemapInfo info)
76 {
77  ARM_COMPUTE_ERROR_ON_NULLPTR(input, map_x, map_y, output);
78  ARM_COMPUTE_ERROR_THROW_ON(CLRemapKernel::validate(input->info(), map_x->info(), map_y->info(), output->info(), info));
79 
80  _input = input;
81  _output = output;
82  _map_x = map_x;
83  _map_y = map_y;
84  _data_layout = input->info()->data_layout();
85 
86  const bool is_nhwc = _data_layout == DataLayout::NHWC;
87  const bool is_constant_border = info.border_mode == BorderMode::CONSTANT;
88 
89  // Create kernel
90  CLBuildOptions build_opts;
91  build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
92  build_opts.add_option_if(is_nhwc, "-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
93  build_opts.add_option_if(is_constant_border, "-DCONSTANT_BORDER");
94 
95  const std::string interpolation_name = lower_string(string_from_interpolation_policy(info.policy));
96  const std::string kernel_name = "remap_" + interpolation_name + "_" + lower_string(string_from_data_layout(_data_layout));
97  _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
98 
99  const unsigned int num_elems_processed_per_iteration = is_nhwc ? 1 : 4;
102  const int input_height = input->info()->dimension(idx_height);
103  const int input_width = input->info()->dimension(idx_width);
104 
105  // Configure window
106  Window win = calculate_max_window(*_output->info(), Steps(num_elems_processed_per_iteration));
107 
108  // Update padding in NCHW case
109  if(_data_layout == DataLayout::NCHW)
110  {
111  const int total_right = ceil_to_multiple(input_width, num_elems_processed_per_iteration);
112  const int access_right = total_right + (((total_right - input_width) == 0) ? border_size().right : 0);
113  AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input_height + border_size().bottom);
114 
116 
117  update_window_and_padding(win, input_access, output_access);
118  }
119 
120  ICLKernel::configure_internal(win);
121 
122  // Set static arguments
123  unsigned int idx = 4 * (is_nhwc ? num_arguments_per_4D_tensor() : num_arguments_per_2D_tensor());
124  _kernel.setArg<cl_float>(idx++, input_width);
125  _kernel.setArg<cl_float>(idx++, input_height);
126  if(is_nhwc && is_constant_border)
127  {
128  switch(input->info()->data_type())
129  {
130  case DataType::U8:
131  set_constant_border<uint8_t>(idx, info.constant_border_value);
132  break;
133  case DataType::F16:
134  static_assert(sizeof(cl_half) == sizeof(half), "Half must be same size as cl_half");
135  static_assert(sizeof(cl_half) == 2, "Half must be 16 bit");
136  set_constant_border<half>(idx, info.constant_border_value);
137  break;
138  default:
139  ARM_COMPUTE_ERROR("Data Type not handled");
140  }
141  }
142 }
143 
144 void CLRemapKernel::run(const Window &window, cl::CommandQueue &queue)
145 {
148  switch(_data_layout)
149  {
150  case DataLayout::NCHW:
151  {
153  do
154  {
155  unsigned int idx = 0;
156  add_2D_tensor_argument(idx, _input, slice);
157  add_2D_tensor_argument(idx, _output, slice);
158  add_2D_tensor_argument(idx, _map_x, slice);
159  add_2D_tensor_argument(idx, _map_y, slice);
160  enqueue(queue, *this, slice, lws_hint());
161 
162  }
163  while(window.slide_window_slice_2D(slice));
164  break;
165  }
166  case DataLayout::NHWC:
167  {
168  Window collapsed = window.collapse(ICLKernel::window(), Window::DimZ);
169  Window slice = collapsed.first_slice_window_4D();
170 
171  unsigned int idx = 0;
172  add_4D_tensor_argument(idx, _input, slice);
173  add_4D_tensor_argument(idx, _output, slice);
174  add_4D_tensor_argument(idx, _map_x, slice);
175  add_4D_tensor_argument(idx, _map_y, slice);
176  enqueue(queue, *this, slice, lws_hint());
177  break;
178  }
179  default:
180  ARM_COMPUTE_ERROR("Invalid Data layout");
181  }
182 }
183 } // namespace arm_compute
Window first_slice_window_2D() const
First 2D slice of the window.
Definition: Window.h:283
unsigned int top
top of the border
Definition: Types.h:377
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, RemapInfo info)
Initialize the kernel&#39;s input, output and border mode.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
Container for 2D border size.
Definition: Types.h:269
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:32
const StringSet & options() const
Gets the current options list set.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
static Status validate(const ITensorInfo *input, const ITensorInfo *map_x, const ITensorInfo *map_y, const ITensorInfo *output, RemapInfo info)
Checks if the kernel&#39;s input, output and border mode will lead to a valid configuration of CLRemapKer...
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:318
1 channel, 1 U8 per channel
void get(uint8_t &v) const
Interpret the pixel value as a U8.
Definition: PixelValue.h:244
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
half_float::half half
16-bit floating point type
Definition: Types.h:48
1 channel, 1 F32 per channel
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
unsigned int bottom
bottom of the border
Definition: Types.h:379
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Status class.
Definition: Error.h:52
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:326
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Definition: Window.h:323
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
Implementation of a static rectangular access pattern.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
void add_option(std::string option)
Adds option to the existing build option list.
Window collapse(const Window &full_window, size_t first, size_t last=Coordinates::num_max_dimensions) const
Collapse the dimensions between first and last.
Definition: Window.inl:111
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:391
const size_t input_width
InterpolationPolicy policy
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Definition: Utils.h:71
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
Implementation of a row access pattern.
const std::string & string_from_interpolation_policy(InterpolationPolicy policy)
Translates a given interpolation policy to a string.
Definition: Utils.cpp:187
unsigned int num_elems_processed_per_iteration
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:39
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
Definition: ICLKernel.h:248
static constexpr unsigned int num_arguments_per_4D_tensor()
Returns the number of arguments enqueued per 4D tensor object.
Definition: ICLKernel.h:264
unsigned int left
left of the border
Definition: Types.h:380
Elementeise CL kernel type.
Definition: CLTypes.h:84
unsigned int right
right of the border
Definition: Types.h:378
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:915
Output values are determined by averaging the source pixels whose areas fall under the area of the de...
Num samples, channels, height, width.
CLRemapKernel()
Default constructor.
CLCompileContext class.
void set_constant_border(unsigned int idx, const PixelValue &constant_border_value)
Function to set the constant value on fill border kernel depending on type.
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:190
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
Definition: Utils.cpp:123
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
Borders are left undefined.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
Num samples, height, width, channels.
const size_t input_height
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
Window first_slice_window_4D() const
First 4D slice of the window.
Definition: Window.h:299
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
std::string kernel_name
BorderSize border_size() const override
The size of the border for that kernel.
void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 4D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:224
DataLayout
[DataLayout enum definition]
Definition: Types.h:113
Describe a multidimensional execution window.
Definition: Window.h:39
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:201
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.