Compute Library
 21.11
CLL2NormalizeLayerKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
31 #include "arm_compute/core/Utils.h"
33 #include "src/core/CL/CLValidate.h"
36 
37 #include "support/StringSupport.h"
38 
39 namespace arm_compute
40 {
41 namespace
42 {
43 constexpr int max_input_tensor_dim = 3;
44 
45 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
46 {
47  ARM_COMPUTE_UNUSED(epsilon);
48 
49  const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
50  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output);
54  ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis > 2, "Actual axis greater than 2 is not supported");
55  ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis >= TensorShape::num_max_dimensions, "Actual normalization axis greater than max number of dimensions");
56 
57  // Reduce shape on axis
58  TensorShape sum_shape = input->tensor_shape();
59  sum_shape.set(actual_axis, 1);
60  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(sum->tensor_shape(), sum_shape);
61 
62  if(output->total_size() != 0)
63  {
67  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(input->tensor_shape(), output->tensor_shape());
68  }
69 
70  return Status{};
71 }
72 } // namespace
73 
75  : _input(nullptr), _sum(nullptr), _output(nullptr), _actual_axis(0), _epsilon(1e-12)
76 {
78 }
79 
80 void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon)
81 {
82  configure(CLKernelLibrary::get().get_compile_context(), input, sum, output, axis, epsilon);
83 }
84 
85 void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon)
86 {
87  ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
88  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), sum->info(), output->info(), axis, epsilon));
89  auto padding_info = get_padding_info({ input, sum, output });
90 
91  _input = input;
92  _sum = sum;
93  _output = output;
94  _actual_axis = wrap_around(axis, max_input_tensor_dim);
95  _epsilon = epsilon;
96 
97  const unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0));
98  const int vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x;
99 
100  // Set build options
101  CLBuildOptions build_opts;
102  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
103  build_opts.add_option("-DVEC_SIZE_X=" + support::cpp11::to_string(vec_size_x));
104  build_opts.add_option("-DVEC_SIZE_LEFTOVER_X=" + support::cpp11::to_string(vec_size_x_leftovers));
105 
106  // Create kernel
107  std::string kernel_name;
108  unsigned int idx = 0;
109  switch(_actual_axis)
110  {
111  case 0:
112  kernel_name = "l2_normalize_x";
113  idx = num_arguments_per_2D_tensor() * 3;
114  break;
115  case 1:
116  kernel_name = "l2_normalize_y";
117  idx = num_arguments_per_2D_tensor() * 3;
118  break;
119  case 2:
120  kernel_name = "l2_normalize_z";
121  idx = num_arguments_per_3D_tensor() * 3;
122  break;
123  default:
124  ARM_COMPUTE_ERROR("Axis not supported");
125  }
126  _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
127 
128  // Set epsilon argument
129  if(input->info()->data_type() == DataType::F32)
130  {
131  _kernel.setArg<cl_float>(idx, _epsilon);
132  }
133  else
134  {
135  _kernel.setArg<cl_half>(idx, _epsilon);
136  }
137 
138  // Configure kernel window
139  Window win = calculate_max_window(*input->info(), Steps(vec_size_x));
140 
141  // Output tensor auto initialization if not yet initialized
142  auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
143 
144  ICLKernel::configure_internal(win);
146 }
147 
148 Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
149 {
150  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, sum, output, axis, epsilon));
151  return Status{};
152 }
153 
154 void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue)
155 {
158 
159  Window window_sum(window);
160 
161  switch(_actual_axis)
162  {
163  case 0:
164  {
165  window_sum.set(Window::DimX, Window::Dimension(0, 0, 0));
166  Window in_slice = window.first_slice_window_2D();
167  Window sum_slice = window_sum.first_slice_window_2D();
168  do
169  {
170  unsigned int idx = 0;
171  add_2D_tensor_argument(idx, _input, in_slice);
172  add_2D_tensor_argument(idx, _sum, sum_slice);
173  add_2D_tensor_argument(idx, _output, in_slice);
174  enqueue(queue, *this, in_slice, lws_hint());
175  }
176  while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice));
177  }
178  break;
179  case 1:
180  {
181  window_sum.set(Window::DimY, Window::Dimension(0, 0, 0));
182  Window in_slice = window.first_slice_window_2D();
183  Window sum_slice = window_sum.first_slice_window_2D();
184  do
185  {
186  unsigned int idx = 0;
187  add_2D_tensor_argument(idx, _input, in_slice);
188  add_2D_tensor_argument(idx, _sum, sum_slice);
189  add_2D_tensor_argument(idx, _output, in_slice);
190  enqueue(queue, *this, in_slice, lws_hint());
191  }
192  while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice));
193  }
194  break;
195  case 2:
196  {
197  window_sum.set(Window::DimZ, Window::Dimension(0, 0, 0));
198  Window in_slice = window.first_slice_window_3D();
199  Window sum_slice = window_sum.first_slice_window_3D();
200  do
201  {
202  unsigned int idx = 0;
203  add_3D_tensor_argument(idx, _input, in_slice);
204  add_3D_tensor_argument(idx, _sum, sum_slice);
205  add_3D_tensor_argument(idx, _output, in_slice);
206  enqueue(queue, *this, in_slice, lws_hint());
207  }
208  while(window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(sum_slice));
209  }
210  break;
211  default:
212  ARM_COMPUTE_ERROR("Not supported");
213  }
214 }
215 } // namespace arm_compute
Window first_slice_window_2D() const
First 2D slice of the window.
Definition: Window.h:283
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
Definition: CLValidate.h:35
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:490
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:32
const StringSet & options() const
Gets the current options list set.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:318
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
std::string to_string(T &&value)
Convert integer and float values to string.
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon)
Set the input and output tensors.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
Definition: Validate.h:284
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:214
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Definition: Window.h:323
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
void add_option(std::string option)
Adds option to the existing build option list.
T wrap_around(T x, T m)
Wrap-around a number within the range 0 <= x < m.
Definition: Helpers.h:247
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:391
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
Definition: ICLKernel.h:256
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:39
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
Definition: ICLKernel.h:248
Elementeise CL kernel type.
Definition: CLTypes.h:84
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
Definition: Window.h:335
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:915
bool has_padding_changed(const std::unordered_map< const ITensorInfo *, PaddingSize > &padding_map)
Check if the previously stored padding info has changed after configuring a kernel.
Definition: Utils.cpp:533
CLCompileContext class.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:190
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:439
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
std::unordered_map< const ITensorInfo *, PaddingSize > get_padding_info(std::initializer_list< const ITensorInfo *> infos)
Stores padding information before configuring a kernel.
Definition: Utils.cpp:518
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
Static function to check if given info will lead to a valid configuration of CLL2NormalizeLayerKernel...
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input&#39;s first dimension, getting rounded down to its closest valid vector size.
Definition: Utils.h:1171
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
Definition: Dimensions.h:46
Window first_slice_window_3D() const
First 3D slice of the window.
Definition: Window.h:291
std::string kernel_name
Describe a multidimensional execution window.
Definition: Window.h:39
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:201