Compute Library
 21.08
CpuWinogradConv2dKernel.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CPUWINOGRADCONV2DKERNEL_H
25 #define ARM_COMPUTE_CPUWINOGRADCONV2DKERNEL_H
26 
27 #include "src/core/NEON/kernels/convolution/common/convolution.hpp"
28 #include "src/core/NEON/kernels/convolution/common/tensor.hpp"
30 
31 #include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp"
32 
33 namespace arm_compute
34 {
35 namespace cpu
36 {
37 /** Interface for the kernel to perform Winograd input transform. */
39 {
40 public:
41  /** Get the working space required to perform the transformation.
42  *
43  * Note, the working space is only required when performing the
44  * transformation - hence it can be reused whenever the transformation is
45  * not running.
46  *
47  * @param num_threads The greatest number of threads that will be used to execute the transform.
48  * @return Size of working space required in bytes.
49  */
50  virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
51 
52  /** Determine how much memory (in units of TIn) to allocate for the
53  * transformed input.
54  *
55  * @param[in] num_batches Number of batches in the input tensor.
56  * @param[in] num_channels Number of feature maps in the input tensor.
57  * @param[in] num_rows Number of rows in each feature map.
58  * @param[in] num_cols Number of columns in each feature map.
59  * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
60  *
61  * @return Storage size (in units of TIn) required.
62  */
63  virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
64 
65  /** Gets the stride between matrices in the input worspace
66  *
67  * @param[in] num_batches Number of batches in the input tensor.
68  * @param[in] num_channels Number of feature maps in the input tensor.
69  * @param[in] num_rows Number of rows in each feature map.
70  * @param[in] num_cols Number of columns in each feature map.
71  * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
72  *
73  * @return Stride expressed in bytes.
74  */
75  virtual int get_matrix_stride(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
76 
77  /** Configure the output transform kernel.
78  *
79  * @param[in] input_nhwc Input tensor in NHWC data layout format.
80  * @param[in] num_batches Number of batches in input tensor.
81  * @param[in] num_rows Number of rows in input tensor.
82  * @param[in] num_cols Number of columns in input tensor.
83  * @param[in] num_channels Number of channels in input tensor.
84  * @param[in] padding Padding type.
85  * @param[out] output Base of output matrices.
86  * @param[in] matrix_stride Stride between output matrices.
87  * @param[in] workspace Tensor to be used as the working space during the computation.
88  */
89  virtual void configure(const ITensorInfo *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
90  const PaddingType padding, ITensorInfo *output, const int matrix_stride, ITensorInfo *workspace) = 0;
91 
92  /** Destructor */
94  {
95  }
96 };
97 
98 /** Kernel to perform Winograd input transform. */
99 template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
101 {
102 public:
103  /** Prevent instances of this class from being copied (As this class contains pointers) */
105  /** Prevent instances of this class from being copied (As this class contains pointers) */
107  /** Allow instances of this class to be moved */
109  /** Allow instances of this class to be moved */
111  /** Default destructor */
113 
114  /** Determine how much memory (in units of TIn) to allocate for the
115  * transformed input.
116  *
117  * @param[in] num_batches Number of batches in the input tensor.
118  * @param[in] num_channels Number of feature maps in the input tensor.
119  * @param[in] num_rows Number of rows in each feature map.
120  * @param[in] num_cols Number of columns in each feature map.
121  * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
122  *
123  * @return Storage size (in units of TIn) required.
124  */
125  unsigned int get_input_storage_size(
126  int num_batches,
127  int num_channels,
128  int num_rows,
129  int num_cols,
130  bool same_padding) const override;
131 
132  /** Get the working space required to perform the transformation.
133  *
134  * Note, the working space is only required when performing the
135  * transformation - hence it can be reused whenever the transformation is
136  * not running.
137  *
138  * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
139  *
140  * @return Size of working space required in bytes.
141  */
142  unsigned int get_working_space_size(unsigned int num_threads) const override;
143 
144  /** Gets the stride between matrices in the input worspace
145  *
146  * @param[in] num_batches Number of batches in the input tensor.
147  * @param[in] num_channels Number of feature maps in the input tensor.
148  * @param[in] num_rows Number of rows in each feature map.
149  * @param[in] num_cols Number of columns in each feature map.
150  * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
151  *
152  * @return Stride expressed in bytes.
153  */
154  int get_matrix_stride(
155  int num_batches,
156  int num_channels,
157  int num_rows,
158  int num_cols,
159  bool same_padding) const override;
160 
161  /** Default constructor */
163 
164  const char *name() const override
165  {
166  return "CpuWinogradConv2dTransformInputKernel";
167  }
168 
169  /** Configure the output transform kernel.
170  *
171  * @param[in] input_nhwc Input tensor. Data types supported: F16/F32. Layout supported NHWC.
172  * @param[in] num_batches Number of batches in input tensor.
173  * @param[in] num_rows Number of rows in input tensor.
174  * @param[in] num_cols Number of columns in input tensor.
175  * @param[in] num_channels Number of channels in input tensor.
176  * @param[in] padding Padding type.
177  * @param[out] output Base of output matrices.
178  * @param[in] matrix_stride Stride between output matrices.
179  * @param[in] workspace Tensor to be used as the working space during the computation.
180  */
181  void configure(
182  const ITensorInfo *input_nhwc,
183  const int num_batches,
184  const int num_rows,
185  const int num_cols,
186  const int num_channels,
187  const PaddingType padding,
188  ITensorInfo *output,
189  const int matrix_stride,
190  ITensorInfo *workspace) override;
191 
192  // Inherited methods overridden:
193  void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
194 
195  /** Winograd base kernel */
196  using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
197  /** Winograd convolution kernel */
198  using WinogradConv = typename WinogradBase::template Convolution<T, T>;
199 
200  /** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformInputKernel
201  *
202  * @param[in] input First tensor input info. Data types supported: F16/F32.
203  * @param[in] output Output tensor info. Data types supported: same as @p input.
204  * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
205  *
206  * @return a status
207  */
208  static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
209 
210 private:
211  using InputTransform = typename WinogradBase::template InputTransform<T, T>;
212 
213  std::unique_ptr<InputTransform> _transform{ nullptr };
214  int _num_channels; /**< Number of channels in input tensor. */
215  int _matrix_stride; /**< Stride between output matrices. */
216 };
217 
218 /** Interface for the kernel to perform Winograd output transform. */
220 {
221 public:
222  /** Get the working space required to perform the transformation.
223  *
224  * Note, the working space is only required when performing the
225  * transformation - hence it can be reused whenever the transformation is
226  * not running.
227  *
228  * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
229  *
230  * @return Size of working space required in bytes.
231  */
232  virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
233 
234  /** Determine how much memory (in units of TOut) to allocate for the
235  * (Winograd domain) output.
236  *
237  * @param[in] num_batches Number of batches in the output tensor.
238  * @param[in] num_rows Number of rows in each feature map of the input tensor.
239  * @param[in] num_cols Number of columns in each feature map of the input tensor.
240  * @param[in] num_output_channels Number of feature maps in the output tensor.
241  *
242  * @return Storage size (in units of TOut) required.
243  */
244  virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
245 
246  /** Gets the stride between matrices in the output worspace
247  *
248  * @param[in] num_batches Number of batches in the output tensor.
249  * @param[in] num_rows Number of rows in each feature map of the input tensor.
250  * @param[in] num_cols Number of columns in each feature map of the input tensor.
251  * @param[in] num_output_channels Number of feature maps in the output tensor.
252  *
253  * @return Stride expressed in bytes.
254  */
255  virtual int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
256 
257  /** Get the output shape of a convolution.
258  *
259  * @param[in] num_rows Number of rows in each feature map of the input tensor.
260  * @param[in] num_cols Number of columns in each feature map of the input tensor.
261  * @param[in] padding_same True if padding is SAME, false otherwise
262  *
263  * @return Shape of the output tensor
264  */
265  virtual std::pair<unsigned int, unsigned int> get_output_shape(
266  int num_rows, /* Number of rows in each feature map of the input tensor. */
267  int num_cols, /* Number of columns in each feature map of the input tensor. */
268  bool padding_same /* True if padding is SAME, false otherwise */
269  ) const = 0;
270 
271  /** Configure the output transform kernel.
272  *
273  * @param[in] biases Pointer to the biases tensor.
274  * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
275  * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
276  * @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
277  * @param[in] num_batches Number of batches in the input tensor.
278  * @param[in] num_rows Number of rows in output tensor.
279  * @param[in] num_cols Number of columns in output tensor.
280  * @param[in] num_channels Number of feature maps in the output tensor.
281  * @param[in] workspace Tensor to be used as the working space during the computation.
282  * @param[in] activation Activation to be used
283  */
284  virtual void configure(
285  const ITensorInfo *biases,
286  const ITensorInfo *transformed_output,
287  const int matrix_stride,
288  ITensorInfo *output_nhwc,
289  const int num_batches,
290  const int num_rows,
291  const int num_cols,
292  const int num_channels,
293  ITensorInfo *workspace,
294  const arm_gemm::Activation &activation) = 0;
295 
297  {
298  }
299 };
300 
301 /** Kernel to perform Winograd output transform. */
302 template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
304 {
305 public:
306  const char *name() const override
307  {
308  return "CpuWinogradConv2dTransformOutputKernel";
309  }
310  /** Constructor */
312 
313  /** Prevent instances of this class from being copied (As this class contains pointers) */
315  /** Prevent instances of this class from being copied (As this class contains pointers) */
317  /** Allow instances of this class to be moved */
319  /** Allow instances of this class to be moved */
321  /** Default destructor */
323 
324  // Inherited methods overridden:
325  /** Determine how much memory (in units of TOut) to allocate for the
326  * (Winograd domain) output.
327  *
328  * @param[in] num_batches Number of batches in the output tensor.
329  * @param[in] num_rows Number of rows in each feature map of the input tensor.
330  * @param[in] num_cols Number of columns in each feature map of the input tensor.
331  * @param[in] num_output_channels Number of feature maps in the output tensor.
332  *
333  * @return Storage size (in units of TOut) required.
334  */
335  unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
336 
337  /** Gets the stride between matrices in the output worspace
338  *
339  * @param[in] num_batches Number of batches in the output tensor.
340  * @param[in] num_rows Number of rows in each feature map of the input tensor.
341  * @param[in] num_cols Number of columns in each feature map of the input tensor.
342  * @param[in] num_output_channels Number of feature maps in the output tensor.
343  *
344  * @return Stride expressed in bytes.
345  */
346  int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
347  /** Get the output shape of a convolution.
348  *
349  * @param[in] num_rows Number of rows in each feature map of the input tensor.
350  * @param[in] num_cols Number of columns in each feature map of the input tensor.
351  * @param[in] padding_same True if padding is SAME, false otherwise
352  *
353  * @return Shape of the output tensor
354  */
355  std::pair<unsigned int, unsigned int> get_output_shape(
356  int num_rows, /* Number of rows in each feature map of the input tensor. */
357  int num_cols, /* Number of columns in each feature map of the input tensor. */
358  bool padding_same) const override;
359 
360  /** Get the working space required to perform the transformation.
361  *
362  * Note, the working space is only required when performing the
363  * transformation - hence it can be reused whenever the transformation is
364  * not running.
365  *
366  * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
367  *
368  * @return Size of working space required in bytes.
369  */
370  unsigned int get_working_space_size(unsigned int num_threads) const override;
371 
372  /** Configure the output transform kernel.
373  *
374  * @param[in] biases Pointer to the biases tensor.
375  * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
376  * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
377  * @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
378  * @param[in] num_batches Number of batches in the input tensor.
379  * @param[in] num_rows Number of rows in output tensor.
380  * @param[in] num_cols Number of columns in output tensor.
381  * @param[in] num_channels Number of feature maps in the output tensor.
382  * @param[in] workspace Tensor to be used as the working space during the computation.
383  * @param[in] activation Activation to be used
384  */
385  void configure(
386  const ITensorInfo *biases,
387  const ITensorInfo *transformed_output,
388  const int matrix_stride,
389  ITensorInfo *output_nhwc,
390  const int num_batches,
391  const int num_rows,
392  const int num_cols,
393  const int num_channels,
394  ITensorInfo *workspace,
395  const arm_gemm::Activation &activation) override;
396 
397  void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
398 
399  /** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformOutputKernel
400  *
401  * @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F16/F32.
402  * @param[in] bias Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
403  * @param[in] output Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
404  * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
405  *
406  * @return a status
407  */
408  static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
409 
410 private:
411  using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
412  using WinogradConv = typename WinogradBase::template Convolution<T, T>;
413  using OutputTransform = typename WinogradBase::template OutputTransform<T, T>;
414 
415  std::unique_ptr<OutputTransform> _transform{ nullptr };
416  int _matrix_stride;
417  int _matrix_row_stride;
418 };
419 
420 /** Interface for the kernel to perform Winograd weights transform. */
422 {
423 public:
424  /** Prevent instances of this class from being copied (As this class contains pointers) */
426  /** Prevent instances of this class from being copied (As this class contains pointers) */
428  /** Allow instances of this class to be moved */
430  /** Allow instances of this class to be moved */
432 
434  {
435  }
437  {
438  }
439  /** Determine how much memory (in units of T) to allocate for the
440  * transformed weights.
441  *
442  * @param[in] num_output_channels Number of output feature maps.
443  * @param[in] num_input_channels Number of input feature maps.
444  *
445  * @return Storage size (in units of T) required.
446  */
447  virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
448  /** Gets the stride between matrices in the kernel worspace
449  *
450  * @param[in] num_output_channels Number of output feature maps.
451  * @param[in] num_input_channels Number of input feature maps.
452  *
453  * @return Stride expressed in bytes.
454  */
455  virtual int get_matrix_stride(int num_output_channels, int num_input_channels) const = 0;
456 
457  /** Configure the weights transform kernel.
458  *
459  * @param[in] weights_hwio Pointer to the weights tensor info
460  * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
461  * @param[in] matrix_stride Stride across matrices in the output workspace.
462  * @param[in] num_output_channels Number of filters.
463  * @param[in] num_input_channels Number of channels in each filter.
464  */
465 
466  virtual void configure(const ITensorInfo *weights_hwio, ITensorInfo *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
467 
468  /** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformWeightsKernel
469  *
470  * @param[in] input First tensor input info. Data types supported: F16/F32.
471  * @param[in] weights Weights tensor info. Data types supported: same as @p input.
472  *
473  * @return a status
474  */
475  static Status validate(const ITensorInfo *input, const ITensorInfo *weights);
476 };
477 
478 /** Kernel to perform Winograd weights transform. */
479 template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
481 {
482 public:
483  /** Prevent instances of this class from being copied (As this class contains pointers) */
485  /** Prevent instances of this class from being copied (As this class contains pointers) */
487  /** Allow instances of this class to be moved */
489  /** Allow instances of this class to be moved */
491  /** Default destructor */
493 
494  /** Default constructor. */
496  const char *name() const override
497  {
498  return "CpuWinogradConv2dTransformWeightsKernel";
499  }
500 
501  /** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformWeightsKernel
502  *
503  * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
504  * kernel_x must be 3 and equal to kernel_y. Data types supported: F16/F32.
505  * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
506  * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
507  *
508  * @return a status
509  */
510  static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
511 
512  // Inherited methods overridden:
513 
514 #ifndef DOXYGEN_SKIP_THIS
515  /** Configure the weights transform kernel.
516  *
517  * @param[in] weights_hwio Pointer to the weights tensor info
518  * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
519  * @param[in] matrix_stride Stride across matrices in the output workspace.
520  * @param[in] num_output_channels Number of filters.
521  * @param[in] num_input_channels Number of channels in each filter.
522  */
523  void configure(const ITensorInfo *weights_hwio, ITensorInfo *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
524 #endif /* DOXYGEN_SKIP_THIS */
525 
526  /** Determine how much memory (in units of T) to allocate for the
527  * transformed weights.
528  *
529  * @param[in] num_output_channels Number of output feature maps.
530  * @param[in] num_input_channels Number of input feature maps.
531  *
532  * @return Storage size (in units of T) required.
533  */
534  unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
535 
536  /** Gets the stride between matrices in the input worspace
537  *
538  * @param[in] num_output_channels Number of output feature maps.
539  * @param[in] num_input_channels Number of input feature maps.
540  *
541  * @return Stride expressed in bytes.
542  */
543  int get_matrix_stride(int num_output_channels, int num_input_channels) const override;
544  void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
545  bool is_parallelisable() const override;
546 
547 private:
548  using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
549  using WinogradConv = typename WinogradBase::template Convolution<T, T>;
550  using WeightsTransform = typename WinogradBase::template WeightsTransform<T, T>;
551 
552  std::unique_ptr<WeightsTransform> _transform{ nullptr };
553  int _num_output_channels;
554  int _matrix_stride;
555 };
556 
557 /** Kernel to perform Winograd. */
558 template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
560 {
561 public:
562  /** Winograd base kernel */
563  using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
564  /** Winograd convolution kernel */
565 
566  using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
567 
571 };
572 
573 } // namespace cpu
574 } // namespace arm_compute
575 #endif /*ARM_COMPUTE_CPUWINOGRADCONV2DKERNEL_H*/
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
winograd::WinogradGEMM< OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers > WinogradBase
Winograd base kernel.
Common interface for all kernels implemented in C++.
Definition: ICPPKernel.h:38
virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const =0
Determine how much memory (in units of TIn) to allocate for the transformed input.
Winograd information.
Definition: Types.h:2142
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
virtual void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
Execute the kernel on the passed window.
Definition: ICPPKernel.h:86
Status class.
Definition: Error.h:52
const char * name() const override
Name of the kernel.
const char * name() const override
Name of the kernel.
virtual void configure(const ITensorInfo *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels, const PaddingType padding, ITensorInfo *output, const int matrix_stride, ITensorInfo *workspace)=0
Configure the output transform kernel.
Copyright (c) 2017-2021 Arm Limited.
typename WinogradBase::template Convolution< T, T > WinogradConv
Winograd convolution kernel.
Kernel to perform Winograd weights transform.
virtual bool is_parallelisable() const
Indicates whether or not the kernel is parallelisable.
Definition: IKernel.cpp:41
typename WinogradBase::template Convolution< TIn, TOut > WinogradConv
Winograd convolution kernel.
virtual int get_matrix_stride(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const =0
Gets the stride between matrices in the input worspace.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Interface for the kernel to perform Winograd input transform.
Information about executing thread and CPU.
Definition: CPPTypes.h:158
Kernel to perform Winograd input transform.
virtual unsigned int get_working_space_size(unsigned int num_threads) const =0
Get the working space required to perform the transformation.
Kernel to perform Winograd output transform.
const char * name() const override
Name of the kernel.
Interface for the kernel to perform Winograd output transform.
Tensor packing service.
Definition: ITensorPack.h:39
winograd::WinogradGEMM< OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers > WinogradBase
Winograd base kernel.
Interface for the kernel to perform Winograd weights transform.
Describe a multidimensional execution window.
Definition: Window.h:39
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)