Compute Library
 20.02.1
CLWinogradInputTransformKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2020 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
32 #include "arm_compute/core/Error.h"
34 #include "arm_compute/core/Types.h"
35 #include "arm_compute/core/Utils.h"
38 
39 using namespace arm_compute;
40 
41 namespace
42 {
43 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
44 {
47 
48  const PadStrideInfo conv_info = winograd_info.convolution_info;
49  const Size2D output_tile_size = winograd_info.output_tile_size;
50  const Size2D kernel_size = winograd_info.kernel_size;
51  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.stride().first != 1 || conv_info.stride().second != 1, "Winograd input transform only supports unit strides");
52  ARM_COMPUTE_RETURN_ERROR_ON_MSG(!cl_winograd_convolution_layer_supported(output_tile_size, kernel_size, input->data_layout()), "Winograd input transform not supported");
53 
55  ARM_COMPUTE_UNUSED(output_tile_size);
56  ARM_COMPUTE_UNUSED(kernel_size);
57 
58  // Validate configured output
59  if(output->total_size() != 0)
60  {
62 
65  }
66 
67  return Status{};
68 }
69 
70 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const WinogradInfo &winograd_info)
71 {
72  ARM_COMPUTE_UNUSED(output);
74 
75  bool window_changed = false;
76  Window win = calculate_max_window(*input, Steps(1, 1));
77 
78  if(input->data_layout() == DataLayout::NCHW)
79  {
80  const PadStrideInfo conv_info = winograd_info.convolution_info;
81  const Size2D output_tile_size = winograd_info.output_tile_size;
82  const Size2D kernel_size = winograd_info.kernel_size;
83 
84  unsigned int num_elems_read_per_iteration_x = output_tile_size.width + kernel_size.width - 1;
85  unsigned int num_elems_read_per_iteration_y = output_tile_size.height + kernel_size.height - 1;
86 
87  AccessWindowRectangle input_access(input, -conv_info.pad_left(), -conv_info.pad_top(), num_elems_read_per_iteration_x, num_elems_read_per_iteration_y);
88  window_changed = update_window_and_padding(win, input_access);
89  }
90  else
91  {
92  AccessWindowStatic input_access(input, 0, -1, input->dimension(0), input->dimension(1) + 1);
93  window_changed = update_window_and_padding(win, input_access);
94  }
95 
96  Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
97  return std::make_pair(err, win);
98 }
99 } // namespace
100 
102  : _border_size(0), _input(nullptr), _output(nullptr), _data_layout(DataLayout::UNKNOWN), _num_tiles_x(0), _num_tiles_y(0), _step_z(1)
103 {
104 }
105 
107 {
108  return _border_size;
109 }
110 
112 {
114  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), winograd_info));
115 
116  const PadStrideInfo conv_info = winograd_info.convolution_info;
117  const Size2D output_tile_size = winograd_info.output_tile_size;
118  const Size2D kernel_size = winograd_info.kernel_size;
119 
120  _data_layout = input->info()->data_layout();
121 
122  const size_t idx_w = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
123  const size_t idx_h = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
124 
125  // Compute number of elements to process in the X and Y direction
126  const int num_elements_x = input->info()->dimension(idx_w) - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right();
127  const int num_elements_y = input->info()->dimension(idx_h) - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom();
128 
129  if(_data_layout == DataLayout::NCHW)
130  {
131  // Check if we need to extend the right or bottom border
132  const unsigned int extra_border_right = ((num_elements_x % output_tile_size.width) == 0) ? 0u : static_cast<unsigned int>(output_tile_size.width - 1);
133  const unsigned int extra_border_bottom = ((num_elements_y % output_tile_size.height) == 0) ? 0u : static_cast<unsigned int>(output_tile_size.height - 1);
134 
135  _border_size = BorderSize(conv_info.pad_top(), conv_info.pad_right() + extra_border_right, conv_info.pad_bottom() + extra_border_bottom, conv_info.pad_left());
136  }
137  else
138  {
139  _border_size = BorderSize(1U, 0U, 1U, 0);
140  }
141 
142  // Compute the number of output tiles along the x and y direction of size "output_tile_size"
143  const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input->info()->dimension(idx_w), input->info()->dimension(idx_h)),
144  kernel_size,
145  output_tile_size,
146  conv_info);
147 
148  _input = input;
149  _output = output;
150  _num_tiles_x = num_tiles.width;
151  _num_tiles_y = num_tiles.height;
152 
154 
155  // Output auto initialization if not yet initialized
156  auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
157 
158  ARM_COMPUTE_ERROR_ON(_num_tiles_x * _num_tiles_y != static_cast<int>(output->info()->dimension(1)));
159  const size_t total_batches = input->info()->tensor_shape().total_size_upper(3);
160 
161  CLBuildOptions build_opts;
162  build_opts.add_option("-DNUM_TILES_X=" + support::cpp11::to_string(_num_tiles_x));
163  build_opts.add_option("-DPAD_LEFT=" + support::cpp11::to_string(conv_info.pad_left()));
164  build_opts.add_option("-DPAD_TOP=" + support::cpp11::to_string(conv_info.pad_top()));
165  build_opts.add_option("-DOUTPUT_TILE_W=" + support::cpp11::to_string(output_tile_size.width));
166  build_opts.add_option("-DOUTPUT_TILE_H=" + support::cpp11::to_string(output_tile_size.height));
167  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
168  build_opts.add_option_if(winograd_info.kernel_size.height == 1, "-DWINOGRAD_INPUT_TRANSFORM_HORIZONTAL");
169  build_opts.add_option_if(winograd_info.kernel_size.width == 1, "-DWINOGRAD_INPUT_TRANSFORM_VERTICAL");
170  if(_data_layout == DataLayout::NHWC)
171  {
172  build_opts.add_option_if(total_batches > 1, "-DNUM_TILES_Y=" + support::cpp11::to_string(_num_tiles_y));
173  build_opts.add_option("-DSRC_DIM_1=" + support::cpp11::to_string(_input->info()->dimension(1)));
174  build_opts.add_option("-DSRC_DIM_2=" + support::cpp11::to_string(_input->info()->dimension(2)));
175  }
176  else
177  {
178  build_opts.add_option_if(total_batches > 1, "-DSRC_DEPTH=" + support::cpp11::to_string(_input->info()->dimension(2)));
179  }
180 
181  // Create kernel
182  std::string kernel_name = "winograd_input_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string();
183 
184  // Get the maximum dimension from the tile size
185  const unsigned int tile_max_dim = std::max(output_tile_size.width, output_tile_size.height);
186 
187  // Check optimized kernel if output_dims == 2x2
188  if((tile_max_dim == 2) && (_data_layout == DataLayout::NCHW))
189  {
190  _step_z = (_input->info()->dimension(2) % 2) != 0 ? 1 : 2;
191  }
192 
193  // Append stepz and data layout
194  kernel_name += "_stepz";
196  kernel_name += "_" + lower_string(string_from_data_layout(_data_layout));
197 
198  _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
199 
200  // Create window and update padding
201  auto win_config = validate_and_configure_window(input->info(), output->info(), winograd_info);
202  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
203  ICLKernel::configure_internal(win_config.second, cl::NDRange(1, 1, 8));
204 
205  _config_id = kernel_name;
206  _config_id += support::cpp11::to_string(input->info()->dimension(0));
207  _config_id += "_";
208  _config_id += support::cpp11::to_string(input->info()->dimension(1));
209  _config_id += "_";
210  _config_id += support::cpp11::to_string(input->info()->dimension(2));
211  _config_id += "_";
212  _config_id += support::cpp11::to_string(conv_info.pad_left());
213  _config_id += "_";
214  _config_id += support::cpp11::to_string(conv_info.pad_top());
215  _config_id += "_";
216  _config_id += lower_string(string_from_data_layout(_data_layout));
217 }
218 
220 {
222  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, winograd_info));
223  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), winograd_info).first);
224 
225  return Status{};
226 }
227 
228 void CLWinogradInputTransformKernel::run(const Window &window, cl::CommandQueue &queue)
229 {
232 
233  const size_t idx_w = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
234  const size_t idx_h = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
235  const size_t idx_c = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL);
236  const size_t total_batches = window.shape().total_size_upper(3);
237 
238  // Collapse window
240 
241  Window slice = window_collapsed.first_slice_window_3D();
242  slice.set(idx_w, Window::Dimension(0, _num_tiles_x, 1));
243  slice.set(idx_h, Window::Dimension(0, _num_tiles_y, 1));
244  if(_data_layout == DataLayout::NHWC)
245  {
246  slice.set(idx_h, Window::Dimension(0, _num_tiles_y * total_batches, 1));
247  }
248 
249  ARM_COMPUTE_ERROR_ON(((slice[idx_c].end() - slice[idx_c].start()) % _step_z) != 0);
250  slice.set(idx_c, Window::Dimension(slice[idx_c].start(), slice[idx_c].end(), _step_z));
251 
252  unsigned int idx = 2 * num_arguments_per_3D_tensor();
253  _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_input->info()->strides_in_bytes()[3]));
254  _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[3]));
255 
256  do
257  {
258  unsigned int idx = 0;
259  add_3D_tensor_argument(idx, _input, slice);
260  add_3D_tensor_argument(idx, _output, slice);
261 
262  enqueue(queue, *this, slice, lws_hint());
263  }
264  while(window_collapsed.slide_window_slice_3D(slice));
265 }
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
Definition: CLValidate.h:34
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
Shape of a tensor.
Definition: TensorShape.h:39
TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd input transform shape.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
BorderSize border_size() const override
The size of the border for that kernel.
Container for 2D border size.
Definition: Types.h:269
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:39
const StringSet & options() const
Gets the current options list set.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
Static function to check if given info will lead to a valid configuration of CLWinogradInputTransform...
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:247
Winograd information.
Definition: Types.h:2154
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
std::string to_string(T &&value)
Convert integer and float values to string.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
size_t total_size_upper(size_t dimension) const
Collapses given dimension and above.
Definition: TensorShape.h:181
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Describe one of the image's dimensions with a start, end and step.
Definition: Window.h:75
Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info)
Calculate the number of output tiles required by Winograd Convolution layer.
Definition: Helpers.h:744
Status class.
Definition: Error.h:52
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:333
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps=Steps(), bool skip_border=false, BorderSize border_size=BorderSize())
Calculate the maximum window for a given tensor shape and border setting.
Definition: Helpers.cpp:28
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:158
Copyright (c) 2017-2020 ARM Limited.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: Helpers.inl:202
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:93
1 channel, 1 F16 per channel
Implementation of a static rectangular access pattern.
void add_option(std::string option)
Adds option to the existing build option list.
Implementation of a rectangular access pattern.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: Helpers.h:402
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
Definition: ICLKernel.h:200
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
Definition: Window.inl:68
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
Definition: Validate.h:288
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
std::string kernel_name
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:37
void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info)
Set the input and output of the kernel.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
Padding and stride information class.
Definition: Types.h:686
std::unique_ptr< Kernel > create_kernel()
Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object.
Definition: Helpers.h:86
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
Definition: Window.h:333
Num samples, channels, height, width.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
Definition: Utils.cpp:132
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
Definition: Error.h:159
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:92
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
Num samples, height, width, channels.
TensorShape shape() const
Return the shape of the window in number of steps.
Definition: Window.inl:272
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:327
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
This function checks if the Winograd configuration (defined through the output tile,...
Definition: CLHelpers.cpp:289
Window first_slice_window_3D() const
First 3D slice of the window.
Definition: Window.h:289
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
DataLayout
[DataLayout enum definition]
Definition: Types.h:117
Describe a multidimensional execution window.
Definition: Window.h:39
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
std::string to_string() const
Definition: Size2D.h:68