Compute Library
 22.05
CpuWinogradConv2d.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 #include "arm_compute/core/Error.h"
26 #include "arm_compute/core/Utils.h"
32 #include "src/common/utils/Log.h"
33 #include "src/core/CPP/Validate.h"
34 #include "src/core/NEON/kernels/convolution/common/utils.hpp"
35 #include "src/core/NEON/kernels/convolution/winograd/winograd.hpp"
42 
43 #include "support/Cast.h"
44 
45 #include <set>
46 
47 namespace arm_compute
48 {
49 namespace cpu
50 {
51 using namespace arm_compute::experimental;
52 using namespace arm_compute::utils::cast;
53 
54 namespace
55 {
56 arm_gemm::Activation arm_gemm_activation_from_acl_activation(const ActivationLayerInfo &act_info)
57 {
58  switch(act_info.activation())
59  {
61  {
62  return arm_gemm::Activation(arm_gemm::Activation::Type::ReLU, act_info.a(), act_info.b());
63  }
65  {
66  return arm_gemm::Activation(arm_gemm::Activation::Type::BoundedReLU, act_info.a(), act_info.b());
67  }
68  default:
69  {
71  }
72  }
73 }
74 
75 inline Status validate_kernel_3x3(const Size2D input_dims, const ITensorInfo *src, const TensorInfo *input0, const TensorInfo *input1, const TensorInfo *batched_mm_output,
76  const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
77 {
80 
81  if(src->data_type() == DataType::F32)
82  {
83  if(input_dims.width > 4 && input_dims.height > 4)
84  {
88  }
89  else
90  {
94  }
95  }
96 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
97  else if(src->data_type() == DataType::F16)
98  {
102  }
103 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
104 
105  if(act_info.enabled())
106  {
107  CpuActivation::validate(dst, nullptr, act_info);
108  }
109  return Status{};
110 }
111 
112 inline Status validate_kernel_5x5(const ITensorInfo *src, const TensorInfo *input0, const TensorInfo *input1, const TensorInfo *batched_mm_output,
113  const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
114 {
118  if(act_info.enabled())
119  {
120  CpuActivation::validate(dst, nullptr, act_info);
121  }
122  return Status{};
123 }
124 
125 inline Status validate_kernel_3x1(const ITensorInfo *src, const TensorInfo *input0, const TensorInfo *input1, const TensorInfo *batched_mm_output,
126  const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
127 {
132  if(act_info.enabled())
133  {
134  CpuActivation::validate(dst, nullptr, act_info);
135  }
136  return Status{};
137 }
138 
139 inline Status validate_kernel_1x3(const ITensorInfo *src, const TensorInfo *input0, const TensorInfo *input1, const TensorInfo *batched_mm_output,
140  const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
141 {
146 
147  if(act_info.enabled())
148  {
149  CpuActivation::validate(dst, nullptr, act_info);
150  }
151  return Status{};
152 }
153 
154 inline Status validate_kernel_5x1(const ITensorInfo *src, const TensorInfo *input0, const TensorInfo *input1, const TensorInfo *batched_mm_output,
155  const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
156 {
161  if(act_info.enabled())
162  {
163  CpuActivation::validate(dst, nullptr, act_info);
164  }
165  return Status{};
166 }
167 inline Status validate_kernel_1x5(const ITensorInfo *src, const TensorInfo *input0, const TensorInfo *input1, const TensorInfo *batched_mm_output,
168  const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
169 {
174  if(act_info.enabled())
175  {
176  CpuActivation::validate(dst, nullptr, act_info);
177  }
178  return Status{};
179 }
180 
181 inline Status validate_kernel_7x1(const ITensorInfo *src, const TensorInfo *input0, const TensorInfo *input1, const TensorInfo *batched_mm_output,
182  const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
183 {
188  if(act_info.enabled())
189  {
190  CpuActivation::validate(dst, nullptr, act_info);
191  }
192  return Status{};
193 }
194 
195 inline Status validate_kernel_1x7(const ITensorInfo *src, const TensorInfo *input0, const TensorInfo *input1, const TensorInfo *batched_mm_output,
196  const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
197 {
202 
203  if(act_info.enabled())
204  {
205  CpuActivation::validate(dst, nullptr, act_info);
206  }
207  return Status{};
208 }
209 
210 inline Tensor4DShape internal_get_input_shape(const ITensorInfo *src)
211 {
212  const DataLayout data_layout = src->data_layout();
213  const int in_width = src->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH));
214  const int in_height = src->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT));
215  const int in_channels = src->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL));
216  const int in_batches = src->dimension(3);
217 
218  return Tensor4DShape{ in_batches, in_height, in_width, in_channels };
219 }
220 
221 Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info)
222 {
223  ARM_COMPUTE_UNUSED(dst);
225 
226  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.stride().first != 1 || conv_info.stride().second != 1, "Winograd layer only supports unit strides.");
227  if(biases != nullptr)
228  {
230  ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
231  }
233 }
234 Size2D winograd_output_tile(const Size2D &input_dims, const Size2D &kernel_dims, DataType data_type)
235 {
236  Size2D output_tile = Size2D{};
237  if(kernel_dims == Size2D(3U, 3U))
238  {
239  output_tile = (input_dims.width <= 4 || input_dims.height <= 4) ? Size2D(2U, 2U) : Size2D(4U, 4U);
240  if(data_type == DataType::F16)
241  {
242  output_tile = Size2D(4U, 4U);
243  }
244  }
245  else if(kernel_dims == Size2D(5U, 5U))
246  {
247  output_tile = Size2D(2U, 2U);
248  }
249  else if(kernel_dims == Size2D(1U, 3U))
250  {
251  output_tile = Size2D(1U, 6U);
252  }
253  else if(kernel_dims == Size2D(3U, 1U))
254  {
255  output_tile = Size2D(6U, 1U);
256  }
257  else if(kernel_dims == Size2D(1U, 5U))
258  {
259  output_tile = Size2D(1U, 4U);
260  }
261  else if(kernel_dims == Size2D(5U, 1U))
262  {
263  output_tile = Size2D(4U, 1U);
264  }
265  else if(kernel_dims == Size2D(7U, 1U))
266  {
267  output_tile = Size2D(2U, 1U);
268  }
269  else if(kernel_dims == Size2D(1U, 7U))
270  {
271  output_tile = Size2D(1U, 2U);
272  }
273  return output_tile;
274 }
275 
276 bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_size, DataType data_type)
277 {
278  // Check if we want to configure a Winograd configuration which requires fast math
279  using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
280 
281  const std::vector<WinogradConfiguration> fast_math_winograd_f16 =
282  {
283  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3))
284  };
285 
286  const std::vector<WinogradConfiguration> fast_math_winograd_f32 =
287  {
288  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(5, 5)),
289  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5))
290  };
291 
292  auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
293  std::pair<int, int>(kernel_size.width, kernel_size.height));
294 
295  switch(data_type)
296  {
297  case DataType::F16:
298  return std::find(fast_math_winograd_f16.begin(), fast_math_winograd_f16.end(), p) != fast_math_winograd_f16.end();
299  case DataType::F32:
300  return std::find(fast_math_winograd_f32.begin(), fast_math_winograd_f32.end(), p) != fast_math_winograd_f32.end();
301  default:
302  return false;
303  }
304 }
305 
306 inline bool fuse_function_supported(const ActivationLayerInfo &act_info)
307 {
308  return act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU || act_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU;
309 }
310 
311 } // namespace
312 
314  : _gemm_function(std::make_unique<CpuGemm>()),
315  _activation_func(std::make_unique<CpuActivation>()),
316  _permute_input(std::make_unique<CpuPermute>()),
317  _permute_output(std::make_unique<CpuPermute>()),
318  _permute_weights(std::make_unique<CpuPermute>()),
319  _transform_input_kernel(nullptr),
320  _transform_weights_kernel(nullptr),
321  _transform_output_kernel(nullptr),
322  _data_layout(),
323  _aux_mem(AuxTensorIdx::Count),
324  _input_nhwc(),
325  _output_nhwc(),
326  _input_workspace(),
327  _kernel_storage(),
328  _output_workspace(),
329  _input_transformed(),
330  _output_transformed(),
331  _weights_hwio(),
332  _run_activation(false),
333  _is_prepared(false)
334 {
335 }
336 
338 
339 void CpuWinogradConv2d::configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst,
340  const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, bool enable_fast_math)
341 {
342  ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
343  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info));
344  ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv_info, act_info, enable_fast_math);
345 
346  // Get indices for the width and height
347  _data_layout = src->data_layout();
348  const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
349  const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
350  const unsigned int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL);
351 
352  const Size2D input_dims = Size2D(src->dimension(width_idx), src->dimension(height_idx));
353  const Size2D kernel_size = Size2D(weights->dimension(width_idx), weights->dimension(height_idx));
354  const DataType data_type = src->data_type();
355  const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, data_type);
356 
357  // Check if the Winograd configuration requires fast math
358  if(!enable_fast_math)
359  {
360  ARM_COMPUTE_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size, data_type),
361  "This Winograd configuration requires enable_fast_math=true");
362  }
363 
364  _is_prepared = false;
365 
366  std::unique_ptr<ICpuWinogradConv2dTransformInputKernel> transform_input_kernel;
367  std::unique_ptr<ICpuWinogradConv2dTransformWeightsKernel> transform_weights_kernel;
368  std::unique_ptr<ICpuWinogradConv2dTransformOutputKernel> transform_output_kernel;
369 
370  int n_gemms = 1;
371  int N_BLOCK = 1; // Size of block used by GEMM.
372  if(data_type == DataType::F32)
373  {
374  if(kernel_size == Size2D(3, 3))
375  {
376  if(src->dimension(width_idx) > 4 && src->dimension(height_idx) > 4)
377  {
379  transform_input_kernel = std::make_unique<config::TransformInputKernel>();
380  transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
381  transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
382  n_gemms = config::WinogradBase::N_GEMMS;
383  N_BLOCK = config::WinogradConv::N_BLOCK;
384  }
385  else
386  {
388  transform_input_kernel = std::make_unique<config::TransformInputKernel>();
389  transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
390  transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
391  n_gemms = config::WinogradBase::N_GEMMS;
392  N_BLOCK = config::WinogradConv::N_BLOCK;
393  }
394  }
395  else if(kernel_size == Size2D(5, 5))
396  {
398  transform_input_kernel = std::make_unique<config::TransformInputKernel>();
399  transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
400  transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
401  n_gemms = config::WinogradBase::N_GEMMS;
402  N_BLOCK = config::WinogradConv::N_BLOCK;
403  }
404  else if(kernel_size == Size2D(1, 3))
405  {
407  transform_input_kernel = std::make_unique<config::TransformInputKernel>();
408  transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
409  transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
410  n_gemms = config::WinogradBase::N_GEMMS;
411  N_BLOCK = config::WinogradConv::N_BLOCK;
412  }
413  else if(kernel_size == Size2D(3, 1))
414  {
416  transform_input_kernel = std::make_unique<config::TransformInputKernel>();
417  transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
418  transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
419  n_gemms = config::WinogradBase::N_GEMMS;
420  N_BLOCK = config::WinogradConv::N_BLOCK;
421  }
422  else if(kernel_size == Size2D(1, 5))
423  {
425  transform_input_kernel = std::make_unique<config::TransformInputKernel>();
426  transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
427  transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
428  n_gemms = config::WinogradBase::N_GEMMS;
429  N_BLOCK = config::WinogradConv::N_BLOCK;
430  }
431  else if(kernel_size == Size2D(5, 1))
432  {
434  transform_input_kernel = std::make_unique<config::TransformInputKernel>();
435  transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
436  transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
437  n_gemms = config::WinogradBase::N_GEMMS;
438  N_BLOCK = config::WinogradConv::N_BLOCK;
439  }
440  else if(kernel_size == Size2D(1, 7))
441  {
443  transform_input_kernel = std::make_unique<config::TransformInputKernel>();
444  transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
445  transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
446  n_gemms = config::WinogradBase::N_GEMMS;
447  N_BLOCK = config::WinogradConv::N_BLOCK;
448  }
449  else if(kernel_size == Size2D(7, 1))
450  {
452  transform_input_kernel = std::make_unique<config::TransformInputKernel>();
453  transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
454  transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
455  n_gemms = config::WinogradBase::N_GEMMS;
456  N_BLOCK = config::WinogradConv::N_BLOCK;
457  }
458  else
459  {
460  ARM_COMPUTE_ERROR("Not supported.");
461  }
462  }
463 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
464  else if(data_type == DataType::F16)
465  {
466  if(kernel_size == Size2D(3, 3))
467  {
469  transform_input_kernel = std::make_unique<config::TransformInputKernel>();
470  transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
471  transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
472  n_gemms = config::WinogradBase::N_GEMMS;
473  N_BLOCK = config::WinogradConv::N_BLOCK;
474  }
475  else
476  {
477  ARM_COMPUTE_ERROR("Not supported.");
478  }
479  }
480 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
481  else
482  {
483  ARM_COMPUTE_ERROR("Not supported.");
484  }
485 
486  const PaddingType use_padding_type = (conv_info.pad_top() != 0u || conv_info.pad_left() != 0) ? PADDING_SAME : PADDING_VALID;
487  const bool use_same_padding = use_padding_type == PADDING_SAME;
488 
489  // Get convolved dimensions
490  const int in_channels = src->dimension(channel_idx);
491  const int out_channels = dst->dimension(channel_idx);
492 
493  const Tensor4DShape in_shape(internal_get_input_shape(src));
494  const size_t data_type_size = src->element_size();
495  // Get the memory required to instantiate a new Winograd operator.
496  constexpr size_t storage_alignment = 64;
497 
498  // Kernel Storage
499  const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels, in_channels) * data_type_size;
500 
501  // Input storage
502  const size_t input_storage_size = transform_input_kernel->get_input_storage_size(in_shape.n_batches, in_shape.n_channels, in_shape.n_rows, in_shape.n_cols, use_same_padding) * data_type_size;
503 
504  // Output storage
505  const size_t output_storage_size = transform_output_kernel->get_output_storage_size(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels) * data_type_size;
506  const int kernel_matrix_stride = transform_weights_kernel->get_matrix_stride(out_channels, in_channels);
507  const int output_matrix_stride = transform_output_kernel->get_matrix_stride(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels);
508  const auto output_shape = transform_output_kernel->get_output_shape(in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
509  const int input_matrix_stride = transform_input_kernel->get_matrix_stride(in_shape.n_batches, in_channels, in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
510 
511  // Configure GEMM
512  const int tile_rows = iceildiv(output_shape.first, output_tile.height);
513  const int tile_cols = iceildiv(output_shape.second, output_tile.width);
514  const int m = in_shape.n_batches * tile_rows * tile_cols;
515  const int k = in_shape.n_channels;
516  const int n = out_channels;
517  const int kernel_matrix_row_stride = roundup(out_channels, N_BLOCK);
518  const int output_matrix_row_stride = kernel_matrix_row_stride;
519 
520  TensorShape a_shape(k, m, 1, n_gemms);
521  Strides a_strides(data_type_size);
522  a_strides.set(1, a_strides[0] * k);
523  //a_strides.set(2, data_type_size * input_matrix_stride / n_gemms); FIXME: This is the real batch size, but RSH's code crashes if it's not 0.
524  a_strides.set(2, 0);
525  a_strides.set(3, data_type_size * input_matrix_stride);
526 
527  TensorShape b_shape(n, k, n_gemms);
528  Strides b_strides(data_type_size);
529  b_strides.set(1, data_type_size * kernel_matrix_row_stride);
530  b_strides.set(2, data_type_size * kernel_matrix_stride);
531 
532  TensorShape d_shape(n, m, 1, n_gemms);
533  Strides d_strides(data_type_size);
534  d_strides.set(1, data_type_size * output_matrix_row_stride);
535  //d_strides.set(2, data_type_size * output_matrix_stride / n_gemms); FIXME: This is the real batch size, but RSH's code crashes if it's not 0.
536  d_strides.set(2, 0);
537  d_strides.set(3, data_type_size * output_matrix_stride);
538 
539  TensorInfo a_info{};
540  TensorInfo b_info{};
541  TensorInfo d_info{};
542  a_info.init(a_shape, 1, data_type, a_strides, 0, input_storage_size);
543  b_info.init(b_shape, 1, data_type, b_strides, 0, kernel_storage_size);
544  d_info.init(d_shape, 1, data_type, d_strides, 0, output_storage_size);
545 
546  _input_transformed = a_info;
547  _kernel_storage = b_info;
548  _output_transformed = d_info;
549 
550  const ITensorInfo *input_to_use = src;
551  ITensorInfo *output_to_use = dst;
552  PermutationVector weights_permutation_vector(3U, 0U, 1U, 2U);
553  const unsigned int max_num_threads = NEScheduler::get().num_threads();
554 
555  // Configure the kernel to transform the input tensor from NCHW -> NHWC
556  if(_data_layout == DataLayout::NCHW)
557  {
558  _permute_input->configure(src, &_input_nhwc, PermutationVector(2U, 0U, 1U));
559  input_to_use = &_input_nhwc;
560  weights_permutation_vector = PermutationVector(3U, 2U, 0U, 1U);
561  }
562 
563  // Configure input transform kernel
564  transform_input_kernel->configure(input_to_use, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
565  &_input_transformed, input_matrix_stride, &_input_workspace);
566  const size_t input_workspace_size = transform_input_kernel->get_working_space_size(max_num_threads);
567  TensorInfo input_workspace_info(TensorShape(input_workspace_size), 1, DataType::U8);
568  _input_workspace = input_workspace_info;
569 
570  // Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
571  _permute_weights->configure(weights, &_weights_hwio, weights_permutation_vector);
572  transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
573 
574  // Configure GEMM function
575  _gemm_function->configure(&_input_transformed, &_kernel_storage, nullptr, &_output_transformed, 1.0f, 0.f);
576 
577  // Configure output transform function
578  // The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method
579  if(_data_layout == DataLayout::NCHW)
580  {
581  // configure and allocate dst tensor to be used to convert from winograd domain to spatial domain when calling to reshape_output()
582  TensorInfo info(TensorShape(dst->dimension(2), dst->dimension(0),
583  dst->dimension(1), dst->dimension(3)),
584  1, dst->data_type());
585  _output_nhwc = info;
586  output_to_use = &_output_nhwc;
587  }
588  const arm_gemm::Activation activation = arm_gemm_activation_from_acl_activation(act_info);
589 
590  transform_output_kernel->configure(biases,
591  &_output_transformed,
592  output_matrix_stride,
593  output_to_use,
594  in_shape.n_batches,
595  output_shape.first,
596  output_shape.second,
597  out_channels,
598  &_output_workspace,
599  activation);
600 
601  const size_t output_workspace_size = transform_output_kernel->get_working_space_size(max_num_threads);
602  TensorInfo output_workspace_info(TensorShape(output_workspace_size), 1, DataType::U8);
603  _output_workspace = output_workspace_info;
604 
605  // Reorder the convoluted output to ACL's ordering NCHW
606  if(_data_layout == DataLayout::NCHW)
607  {
608  _permute_output->configure(&_output_nhwc, dst, PermutationVector(1U, 2U, 0U));
609  }
610 
611  _transform_input_kernel = std::move(transform_input_kernel);
612  _transform_weights_kernel = std::move(transform_weights_kernel);
613  _transform_output_kernel = std::move(transform_output_kernel);
614 
615  //Configure Activation Layer
616  _run_activation = act_info.enabled() && !fuse_function_supported(act_info);
617  if(_run_activation)
618  {
619  _activation_func->configure(dst, nullptr, act_info);
620  }
621 
622  auto asm_mem_req = _gemm_function->workspace();
623  _aux_mem[GemmWorkspace] = asm_mem_req[GemmWorkspace];
624  _aux_mem[Pretranspose] = asm_mem_req[Pretranspose];
625  _aux_mem[InterleavedLHS] = asm_mem_req[InterleavedLHS];
626  _aux_mem[TransposedRHS] = asm_mem_req[TransposedRHS];
627  _aux_mem[TempResult] = asm_mem_req[TempResult];
628 
629  // Request temporary memory. Overlap memory needed for Input/Output transformations as they run on different non-overlapping time-steps.
630  _aux_mem[TransformedInput] = MemoryInfo(offset_int_vec(TransformedInput), MemoryLifetime::Temporary, input_storage_size, storage_alignment);
631  _aux_mem[TransformedOutput] = MemoryInfo(offset_int_vec(TransformedOutput), MemoryLifetime::Temporary, output_storage_size, storage_alignment);
632  _aux_mem[WorkspaceIO] = MemoryInfo(offset_int_vec(WorkspaceIO), MemoryLifetime::Temporary, std::max(input_workspace_size, output_workspace_size));
633  _aux_mem[PermutedWeights] = MemoryInfo(offset_int_vec(PermutedWeights), MemoryLifetime::Prepare, _weights_hwio.total_size());
634  _aux_mem[TransformedWeights] = MemoryInfo(offset_int_vec(TransformedWeights), MemoryLifetime::Persistent, kernel_storage_size, storage_alignment);
635  if(_data_layout == DataLayout::NCHW)
636  {
637  _aux_mem[PermutedInput].merge(offset_int_vec(PermutedInput), src->total_size());
638  _aux_mem[PermutedOutput].merge(offset_int_vec(PermutedOutput), dst->total_size());
639  }
640 }
641 
642 Status CpuWinogradConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
643  const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, bool enable_fast_math)
644 {
645  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
646  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info));
647 
648  // Get indices for the width and height
651 
652  // Input shape, kernel size and output tile
653  const Size2D input_dims = Size2D(src->dimension(idx_width), src->dimension(idx_height));
654  const Size2D kernel_size = Size2D(weights->dimension(idx_width), weights->dimension(idx_height));
655  const DataType data_type = src->data_type();
656  const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, data_type);
657 
658  // Check if the Winograd configuration requires fast math
659  if(!enable_fast_math)
660  {
661  ARM_COMPUTE_RETURN_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size, data_type),
662  "This Winograd configuration requires enable_fast_math=true");
663  }
664 
665  const WinogradInfo winograd_info = WinogradInfo(output_tile,
666  kernel_size,
667  input_dims,
668  conv_info,
669  src->data_layout());
670 
671  // Validate input transform
672  const TensorShape input0_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*src, winograd_info);
673  const TensorInfo input0 = src->clone()->set_tensor_shape(input0_shape);
674  // Validate filter transform
675  const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights, winograd_info);
676  const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape);
677  // Validate batched matrix multiply
678  TensorShape batched_mm_output_shape = input0.tensor_shape();
679  batched_mm_output_shape[0] = input1.tensor_shape()[0];
680  const TensorInfo batched_mm_output = input0.clone()->set_tensor_shape(batched_mm_output_shape);
681 
682  if(kernel_size == Size2D(3, 3))
683  {
684  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 1, "Only SAME or VALID padding supported");
685  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 1, "Only SAME or VALID padding supported");
686  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 1, "Only SAME or VALID padding supported");
687  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 1, "Only SAME or VALID padding supported");
688  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != conv_info.pad_left(), "Only SAME or VALID padding supported");
689  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_bottom(), "Only SAME or VALID padding supported");
690  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_left(), "Only SAME or VALID padding supported");
691  return validate_kernel_3x3(input_dims, src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
692  }
693  else if(kernel_size == Size2D(5, 5))
694  {
695  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 2, "Only SAME or VALID padding supported");
696  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 2, "Only SAME or VALID padding supported");
697  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 2, "Only SAME or VALID padding supported");
698  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 2, "Only SAME or VALID padding supported");
699  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != conv_info.pad_left(), "Only SAME or VALID padding supported");
700  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_bottom(), "Only SAME or VALID padding supported");
701  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_left(), "Only SAME or VALID padding supported");
702  return validate_kernel_5x5(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
703  }
704  if(kernel_size == Size2D(3, 1))
705  {
706  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 1, "Only SAME or VALID padding supported");
707  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 1, "Only SAME or VALID padding supported");
708  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_bottom() != 0, "Only SAME or VALID padding supported");
709  return validate_kernel_3x1(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
710  }
711  else if(kernel_size == Size2D(1, 3))
712  {
713  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 1, "Only SAME or VALID padding supported");
714  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 1, "Only SAME or VALID padding supported");
715  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_right() != 0, "Only SAME or VALID padding supported");
716  return validate_kernel_1x3(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
717  }
718  else if(kernel_size == Size2D(5, 1))
719  {
720  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 2, "Only SAME or VALID padding supported");
721  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 2, "Only SAME or VALID padding supported");
722  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_bottom() != 0, "Only SAME or VALID padding supported");
723  return validate_kernel_5x1(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
724  }
725  else if(kernel_size == Size2D(1, 5))
726  {
727  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 2, "Only SAME or VALID padding supported");
728  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 2, "Only SAME or VALID padding supported");
729  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_right() != 0, "Only SAME or VALID padding supported");
730  return validate_kernel_1x5(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
731  }
732  else if(kernel_size == Size2D(7, 1))
733  {
734  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 3, "Only SAME or VALID padding supported");
735  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 3, "Only SAME or VALID padding supported");
736  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_bottom() != 0, "Only SAME or VALID padding supported");
737  return validate_kernel_7x1(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
738  }
739  else if(kernel_size == Size2D(1, 7))
740  {
741  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 3, "Only SAME or VALID padding supported");
742  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 3, "Only SAME or VALID padding supported");
743  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_right() != 0, "Only SAME or VALID padding supported");
744  return validate_kernel_1x7(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
745  }
746  else
747  {
748  ARM_COMPUTE_RETURN_ERROR_MSG("Kernel shape not supported");
749  }
750 }
751 
753 {
754  prepare(tensors);
755 
756  auto a = tensors.get_const_tensor(ACL_SRC_0);
757  auto c = tensors.get_const_tensor(ACL_SRC_2);
758  auto d = tensors.get_tensor(ACL_DST);
759 
760  CpuAuxTensorHandler input_nhwc(offset_int_vec(PermutedInput), _input_nhwc, tensors, true);
761  CpuAuxTensorHandler input_transformed(offset_int_vec(TransformedInput), _input_transformed, tensors, true);
762  CpuAuxTensorHandler input_workspace(offset_int_vec(WorkspaceIO), _input_workspace, tensors, true);
763 
764  const bool is_nchw = _data_layout == DataLayout::NCHW;
765  if(is_nchw)
766  {
767  //Bring channels to the front as Winograd code expects the tensor to be in the format NHWC
768  ITensorPack pack{ { ACL_SRC, a }, { ACL_DST, input_nhwc.get() } };
769  _permute_input->run(pack);
770  }
771 
772  // Transform input tensor to the winograd domain
773  ITensorPack transform_input_pack{ { ACL_SRC, is_nchw ? input_nhwc.get() : a }, { ACL_DST, input_transformed.get() }, { ACL_INT, input_workspace.get() } };
774  NEScheduler::get().schedule_op(_transform_input_kernel.get(), Window::DimX, _transform_input_kernel->window(), transform_input_pack);
775 
776  CpuAuxTensorHandler output_transformed(offset_int_vec(TransformedOutput), _output_transformed, tensors, true);
777  CpuAuxTensorHandler weights_transformed(offset_int_vec(TransformedWeights), _kernel_storage, tensors, true);
778 
779  // Run 16 GEMMs in multiple threads, each kernel runs one or more GEMMs
780  ITensorPack gemm_pack = tensors;
781  gemm_pack.add_const_tensor(ACL_SRC, input_transformed.get());
782  gemm_pack.add_const_tensor(ACL_SRC_1, weights_transformed.get());
783  gemm_pack.add_const_tensor(ACL_BIAS, nullptr);
784  gemm_pack.add_tensor(ACL_DST, output_transformed.get());
785  _gemm_function->run(gemm_pack);
786 
787  // Transform output tensor to the spatial domain
788  CpuAuxTensorHandler output_workspace(offset_int_vec(WorkspaceIO), _output_workspace, tensors, true);
789  CpuAuxTensorHandler output_nhwc(offset_int_vec(PermutedOutput), _output_nhwc, tensors, true);
790  ITensorPack transform_output_pack{ { ACL_SRC_0, c }, { ACL_SRC_1, output_transformed.get() }, { ACL_DST, is_nchw ? output_nhwc.get() : d }, { ACL_INT, output_workspace.get() } };
791  NEScheduler::get().schedule_op(_transform_output_kernel.get(), Window::DimX, _transform_output_kernel->window(), transform_output_pack);
792 
793  if(is_nchw)
794  {
795  // Reorder the convoluted output to ACL's ordering NCHW
796  ITensorPack pack{ { ACL_SRC, output_nhwc.get() }, { ACL_DST, d } };
797  _permute_output->run(pack);
798  }
799 
800  if(_run_activation)
801  {
802  ITensorPack pack{ { ACL_SRC, d }, { ACL_DST, d } };
803  _activation_func->run(pack);
804  }
805 }
806 
808 {
809  if(!_is_prepared)
810  {
811  // Permute weights
812  const ITensor *weights = tensors.get_const_tensor(ACL_SRC_1);
813  ITensor *weights_aux = utils::cast::polymorphic_cast<ITensor *>(tensors.get_tensor(offset_int_vec(PermutedWeights)));
814  ARM_COMPUTE_ERROR_ON_NULLPTR(weights, weights_aux);
815 
816  CpuAuxTensorHandler permuted_weights(_weights_hwio, *weights_aux);
817  ITensorPack permute_tensors{ { ACL_SRC, weights }, { ACL_DST, permuted_weights.get() } };
818  _permute_weights->run(permute_tensors);
819 
820  // Transform weights
821  ITensor *weights_transf = utils::cast::polymorphic_cast<ITensor *>(tensors.get_tensor(offset_int_vec(TransformedWeights)));
822  ARM_COMPUTE_ERROR_ON_NULLPTR(weights_transf);
823 
824  CpuAuxTensorHandler transformed_weights(_kernel_storage, *weights_transf);
825  ITensorPack transform_tensors{ { ACL_SRC, permuted_weights.get() }, { ACL_DST, transformed_weights.get() } };
826  NEScheduler::get().schedule_op(_transform_weights_kernel.get(), Window::DimX, _transform_weights_kernel->window(), transform_tensors);
827 
828  ITensorPack gemm_pack = tensors;
829  gemm_pack.add_const_tensor(ACL_SRC_1, transformed_weights.get());
830  _gemm_function->prepare(gemm_pack);
831 
832  _is_prepared = true;
833  }
834 }
835 
837 {
838  return _aux_mem;
839 }
840 } // namespace cpu
841 } // namespace arm_compute
T roundup(const T a, const T b)
Definition: utils.hpp:70
void set(size_t dimension, T value, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: Dimensions.h:76
Basic function to run kernels::CpuActivationKernel.
Definition: CpuActivation.h:34
Shape of a tensor.
Definition: TensorShape.h:39
TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd input transform shape.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
Static function to check if given info will lead to a valid configuration of CpuWinogradConv2dTransfo...
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
Definition: TensorInfo.cpp:282
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
Definition: Validate.h:115
bool enabled() const
Check if initialised.
Definition: Types.h:1675
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
Definition: ITensorPack.cpp:49
bool merge(int slot, size_t new_size, size_t new_alignment=0) noexcept
Definition: Types.h:115
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
Winograd information.
Definition: Types.h:2328
1 channel, 1 U8 per channel
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
T iceildiv(const T a, const T b)
Definition: utils.hpp:65
1 channel, 1 F32 per channel
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
Strides PermutationVector
Permutation vector.
Definition: Types.h:51
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:753
Status class.
Definition: Error.h:52
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1625
Interface for CPU tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:91
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:134
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:54
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
static Status validate(const ITensorInfo *input, const ITensorInfo *weights)
Static function to check if given info will lead to a valid configuration of CpuWinogradConv2dTransfo...
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
Static function to check if given info will lead to a valid configuration of CpuWinogradConv2dTransfo...
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
size_t total_size() const override
Returns the total size of the tensor in bytes.
Definition: TensorInfo.h:250
Basic function to run kernels::CpuPermuteKernel.
Definition: CpuPermute.h:34
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
unsigned int pad_right() const
Get the right padding.
Definition: Types.h:748
Padding and stride information class.
Definition: Types.h:669
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
Basic function to execute GEMM.
Definition: CpuGemm.h:62
TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd filter transform shape.
void prepare(ITensorPack &constants) override
Prepare the function for executing.
Num samples, channels, height, width.
void init(Format format)
Initialize the tensor info with just a format.
Definition: TensorInfo.cpp:103
Strides of an item in bytes.
Definition: Strides.h:37
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info)
Static function to check if given info will lead to a valid configuration of CpuWinogradConv2dTransfo...
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CpuWinogradConv2d.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:64
#define ARM_COMPUTE_RETURN_ERROR_MSG(...)
An error is returned with the given description.
Definition: Error.h:194
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
Target polymorphic_cast(Source *v)
Polymorphic cast between two types.
Definition: Cast.h:47
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:90
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
Tensor packing service.
Definition: ITensorPack.h:39
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
int offset_int_vec(int offset)
Definition: MemoryHelpers.h:38
virtual unsigned int num_threads() const =0
Returns the number of threads that the SingleThreadScheduler has in its pool.
unsigned int pad_bottom() const
Get the bottom padding.
Definition: Types.h:758
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:234
DataType
Available data types.
Definition: Types.h:79
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:743
DataLayout
[DataLayout enum definition]
Definition: Types.h:113
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
Definition: ITensorPack.cpp:39
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:94