Compute Library
 21.02
GCIm2ColKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
26 
27 #include "arm_compute/core/Error.h"
35 #include "arm_compute/core/Types.h"
40 #include "support/StringSupport.h"
41 
42 #include <cmath>
43 #include <tuple>
44 
45 using namespace arm_compute;
46 
47 namespace
48 {
50 {
53 
54  // Checks performed when output is configured
55  if(output->total_size() != 0)
56  {
58  }
59 
60  return Status{};
61 }
62 } // namespace
63 
65  : _input(nullptr), _output(nullptr), _convolved_dims(), _kernel_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr)
66 {
67 }
68 
69 void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
70 {
71  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
72 
73  // Perform validation step
75 
76  _input = input;
77  _output = output;
78 
79  // Create kernel
80  std::set<std::string> build_opts;
81  std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
82  build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
83  build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
84  build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
85  build_opts.insert("#define " + dt_name);
86 
87  if(has_bias)
88  {
89  build_opts.emplace("#define HAS_BIAS");
90  }
91 
92  int stride_x = 0;
93  int stride_y = 0;
94 
95  std::tie(stride_x, stride_y) = conv_info.stride();
96  _kernel_dims = std::make_pair(kernel_dims.width, kernel_dims.height);
97 
98  const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4)
99  && (std::equal(input->info()->tensor_shape().cbegin() + 3,
100  input->info()->tensor_shape().cend(),
101  output->info()->tensor_shape().cbegin() + 1))
102  && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding())
103  && (dilation == Size2D(1U, 1U));
104 
105  std::string kernel_name = "im2col_generic";
106  if(!run_img2col_reduced)
107  {
108  if(input->info()->data_type() == DataType::F16 && _kernel_dims == std::pair<unsigned int, unsigned int>(1, 1))
109  {
110  build_opts.emplace("#define KERNEL_1x1");
111  }
112 
113  build_opts.emplace("#define IM2COL_GENERIC");
114  _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
115  kernel_dims.width, kernel_dims.height,
116  conv_info, dilation);
117  _num_elems_processed_per_iteration = (input->info()->data_type() == DataType::F32) ? 1 : 2;
118 
119  build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width));
120  build_opts.emplace("#define KERNEL_HEIGHT " + support::cpp11::to_string(kernel_dims.height));
121  build_opts.emplace("#define KERNEL_DEPTH " + support::cpp11::to_string(input->info()->dimension(2)));
122  build_opts.emplace("#define CONVOLVED_WIDTH " + support::cpp11::to_string(_convolved_dims.first));
123  build_opts.emplace("#define CONVOLVED_HEIGHT " + support::cpp11::to_string(_convolved_dims.second));
124  build_opts.emplace("#define STRIDE_X " + support::cpp11::to_string(conv_info.stride().first));
125  build_opts.emplace("#define STRIDE_Y " + support::cpp11::to_string(conv_info.stride().second));
126  build_opts.emplace("#define PAD_LEFT " + support::cpp11::to_string(conv_info.pad_left()));
127  build_opts.emplace("#define PAD_TOP " + support::cpp11::to_string(conv_info.pad_top()));
128  build_opts.emplace("#define PAD_RIGHT " + support::cpp11::to_string(conv_info.pad_right()));
129  build_opts.emplace("#define PAD_BOTTOM " + support::cpp11::to_string(conv_info.pad_bottom()));
130  build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0)));
131  build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1)));
132  build_opts.emplace("#define DILATION_X " + support::cpp11::to_string(dilation.x()));
133  build_opts.emplace("#define DILATION_Y " + support::cpp11::to_string(dilation.y()));
134 
135  _run_func = &GCIm2ColKernel::run_generic;
136  }
137  else
138  {
139  build_opts.emplace("#define IM2COL_REDUCED");
140  kernel_name = "im2col_reduced";
141 
142  if(input->info()->data_type() == DataType::F32)
143  {
144  _num_elems_processed_per_iteration = 4 / input->info()->element_size();
145  }
146  else if(input->info()->data_type() == DataType::F16)
147  {
148  int input_width = input->info()->dimension(0);
149  int input_height = input->info()->dimension(1);
150 
151  build_opts.emplace("#define IMAGE_SIZE " + support::cpp11::to_string(input_width * input_height));
152  if(input_width % 8 == 0)
153  {
154  _num_elems_processed_per_iteration = 8;
155  build_opts.emplace("#define IM2COL_REDUCED_8X");
156  }
157  else if(input_width % 4 == 0)
158  {
159  _num_elems_processed_per_iteration = 4;
160  build_opts.emplace("#define IM2COL_REDUCED_4X");
161  }
162  else if(input_width % 2 == 0)
163  {
164  _num_elems_processed_per_iteration = 2;
165  build_opts.emplace("#define IM2COL_REDUCED_2X");
166  }
167  else
168  {
169  _num_elems_processed_per_iteration = 2;
170  build_opts.emplace("#define IM2COL_REDUCED_GENERIC");
171  }
172  }
173 
174  _run_func = &GCIm2ColKernel::run_reduced;
175  }
176 
177  // Create kernel
178  _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts));
179 
180  // Configure kernel window
181  Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration));
182 
183  if(input->info()->data_type() == DataType::F16)
184  {
185  // Calculate input right and bottom border
186  const int input_width = input->info()->dimension(0);
187  const int input_height = input->info()->dimension(1);
188  int input_total_width = input->info()->padding().left + input_width + input->info()->padding().right;
189  int input_padding_right = ceil_to_multiple(input_total_width, _num_elems_processed_per_iteration) - input_total_width;
190  input_total_width = input_width + input_padding_right + input->info()->padding().right;
191  AccessWindowStatic input_access(input->info(), 0, 0, input_total_width, input_height);
192 
193  // Calculate output right and bottom border
194  const int output_width = output->info()->dimension(0);
195  const int output_height = output->info()->dimension(1);
196  const int output_padding_right = ceil_to_multiple(output_width, _num_elems_processed_per_iteration) - output_width;
197  AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height);
198 
199  update_window_and_padding(win, input_access, output_access);
200  }
201 
202  output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
203 
204  if(!run_img2col_reduced)
205  {
206  // set the Z dimension's step same size as the whole dimension so that one can't split across the Z dimension
208  }
209 
210  IGCKernel::configure(win);
211 }
212 
213 Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
214 {
215  ARM_COMPUTE_UNUSED(kernel_dims);
216  ARM_COMPUTE_UNUSED(conv_info);
217  ARM_COMPUTE_UNUSED(has_bias);
218  ARM_COMPUTE_UNUSED(dilation);
220  return Status{};
221 }
222 
224 {
225  ARM_COMPUTE_ERROR_ON(_run_func == nullptr);
226  (this->*_run_func)(window);
227 }
228 
229 void GCIm2ColKernel::run_generic(const Window &window)
230 {
233 
234  // Get initial windows
235  Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
236 
237  // Change the Z dimension's step back to 1
238  window_collapsed.set_dimension_step(Window::DimZ, 1);
239 
240  Window slice = window_collapsed.first_slice_window_3D();
241  Window slice_in = window_collapsed.first_slice_window_3D();
242  Window slice_out = window_collapsed.first_slice_window_3D();
243 
244  // Setup slice
245  slice.set(Window::DimX, Window::Dimension(0, static_cast<int>(_convolved_dims.first), 1));
246  slice.set(Window::DimY, Window::Dimension(0, static_cast<int>(_convolved_dims.second), 1));
247 
248  // Setup output slice
249  slice_out.set(Window::DimX, Window::Dimension(0, _output->info()->dimension(0), _num_elems_processed_per_iteration));
250  slice_out.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 1));
251  slice_out.set(Window::DimZ, Window::Dimension(0, 1, 1));
252 
253  // we need top/left pad to be included in valid region
254  if(_input->info()->data_type() == DataType::F16)
255  {
256  (dynamic_cast<TensorInfo *>(_input->info()))->init(_input->info()->tensor_shape(), _input->info()->num_channels(), _input->info()->data_type(), _input->info()->strides_in_bytes(), 0,
257  _input->info()->total_size());
258  }
259 
260  _kernel.use();
261 
262  do
263  {
264  unsigned int idx = 0;
265  add_3D_tensor_argument(idx, _input, 1, slice_in);
266  add_2D_tensor_argument(idx, _output, 2, slice_out);
267  _kernel.set_argument(idx++, static_cast<unsigned int>(_input->info()->strides_in_bytes()[3]));
268  _kernel.set_argument(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[3]));
269  _kernel.update_shader_params();
270 
271  enqueue(*this, slice);
272  }
273  while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_out) && window_collapsed.slide_window_slice_3D(slice_in));
274 }
275 
276 void GCIm2ColKernel::run_reduced(const Window &window)
277 {
280 
281  Window out_window;
282  out_window.use_tensor_dimensions(_output->info()->tensor_shape());
283 
284  Window out_slice = out_window.first_slice_window_1D();
285  Window in_slice = window.first_slice_window_3D();
286 
287  _kernel.use();
288 
289  // Run kernel
290  do
291  {
292  // Set arguments
293  unsigned int idx = 0;
294 
295  add_3D_tensor_argument(idx, _input, 1, in_slice);
296  add_1D_tensor_argument(idx, _output, 2, out_slice);
297  _kernel.set_argument(idx++, _input->info()->dimension(0));
298  _kernel.set_argument(idx++, _input->info()->dimension(1));
299  _kernel.update_shader_params();
300 
301  enqueue(*this, in_slice);
302  }
303  while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice));
304 }
void configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U))
Set the input and output of the kernel.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
Add the passed 3D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: IGCKernel.cpp:132
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U))
Static function to check if given info will lead to a valid configuration of CLIm2ColKernel.
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
Definition: IGCKernel.cpp:41
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
void run(const Window &window) override
Enqueue the OpenGL ES shader to process the given window.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
Interface for GLES Compute tensor.
Definition: IGCTensor.h:35
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77
GCKernel class.
size_t x() const
Semantic accessor for width as x.
Definition: Size2D.h:74
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:806
Status class.
Definition: Error.h:52
void use_tensor_dimensions(const TensorShape &shape, size_t first_dimension=Window::DimX)
Use the tensor&#39;s dimensions to fill the window dimensions.
Definition: Window.inl:276
Copyright (c) 2017-2021 Arm Limited.
virtual void set_valid_region(const ValidRegion &valid_region)=0
Set the valid region of the tensor.
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:90
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
Definition: Utils.cpp:419
Implementation of a static rectangular access pattern.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w)
Definition: Validate.h:183
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
Definition: Window.inl:68
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Definition: Utils.h:71
Manages all the GLES kernels compilation and caching, provides accessors for the GLES Context...
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
Coordinates of an item.
Definition: Coordinates.h:37
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:770
std::string kernel_name
GCIm2ColKernel()
Default constructor.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
unsigned int pad_right() const
Get the right padding.
Definition: Types.h:801
Padding and stride information class.
Definition: Types.h:722
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
void end(TokenStream &in, bool &valid)
Definition: MLGOParser.cpp:290
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
virtual PaddingSize padding() const =0
Padding of tensor.
void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
Add the passed 2D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: IGCKernel.cpp:127
unsigned int left
left of the border
Definition: Types.h:378
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
Definition: Window.h:335
unsigned int right
right of the border
Definition: Types.h:376
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
size_t y() const
Semantic accessor for height as y.
Definition: Size2D.h:83
std::array< T, num_max_dimensions >::const_iterator cend() const
Returns a read-only (constant) iterator that points one past the last element in the dimension array...
Definition: Dimensions.h:255
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
Wrapper to configure the Khronos EGL and OpenGL ES C header.
static GCKernelLibrary & get()
Get the static instance of GCKernelLibrary.
void set_dimension_step(size_t dimension, int step)
Set the step of a given dimension.
Definition: Window.inl:167
std::array< T, num_max_dimensions >::const_iterator cbegin() const
Returns a read-only (constant) iterator that points to the first element in the dimension array...
Definition: Dimensions.h:231
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:89
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
GCKernel create_kernel(const std::string &shader_name, const StringSet &build_options_set={}) const
Creates a kernel from the kernel library.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
Container for valid region of a window.
Definition: Types.h:188
unsigned int pad_bottom() const
Get the bottom padding.
Definition: Types.h:811
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
Definition: Dimensions.h:46
Window first_slice_window_3D() const
First 3D slice of the window.
Definition: Window.h:291
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:796
bool slide_window_slice_1D(Window &slice) const
Slide the passed 1D window slice.
Definition: Window.h:311
Describe a multidimensional execution window.
Definition: Window.h:39
virtual size_t num_channels() const =0
The number of channels for each tensor element.
void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
Add the passed 1D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: IGCKernel.cpp:122
Window first_slice_window_1D() const
First 1D slice of the window.
Definition: Window.h:275
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
bool has_padding() const
Check whether this has any padding.
Definition: Types.h:823