24 #ifndef ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H 25 #define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H 28 #include "src/core/NEON/kernels/convolution/common/convolution.hpp" 29 #include "src/core/NEON/kernels/convolution/common/tensor.hpp" 31 #include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp" 64 virtual unsigned int get_input_storage_size(
int num_batches,
int num_channels,
int num_rows,
int num_cols,
bool same_padding)
const = 0;
76 virtual int get_matrix_stride(
int num_batches,
int num_channels,
int num_rows,
int num_cols,
bool same_padding)
const = 0;
90 virtual void configure(
const ITensor *input_nhwc,
const int num_batches,
const int num_rows,
const int num_cols,
const int num_channels,
91 const PaddingType padding,
ITensor *output,
const int matrix_stride,
ITensor *workspace) = 0;
100 template <
typename T,
int OutputTileRows,
int OutputTileCols,
int KernelRows,
int KernelCols>
131 bool same_padding)
const override;
160 bool same_padding)
const override;
165 const char *
name()
const override 167 return "NEWinogradLayerTransformInputKernel";
184 const int num_batches,
187 const int num_channels,
188 const PaddingType padding,
190 const int matrix_stride,
197 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
199 using WinogradConv =
typename WinogradBase::template Convolution<T, T>;
212 using InputTransform =
typename WinogradBase::template InputTransform<T, T>;
214 std::unique_ptr<InputTransform> _transform{
nullptr };
220 PaddingType _padding;
256 virtual unsigned int get_output_storage_size(
int num_batches,
int num_rows,
int num_cols,
int num_output_channels)
const = 0;
267 virtual int get_matrix_stride(
int num_batches,
int num_rows,
int num_cols,
int num_output_channels)
const = 0;
277 virtual std::pair<unsigned int, unsigned int> get_output_shape(
298 const ITensor *transformed_output,
299 const int matrix_stride,
301 const int num_batches,
304 const int num_channels,
314 template <
typename T,
int OutputTileRows,
int OutputTileCols,
int KernelRows,
int KernelCols>
318 const char *
name()
const override 320 return "NEWinogradLayerTransformOutputKernel";
347 unsigned int get_output_storage_size(
int num_batches,
int num_rows,
int num_cols,
int num_output_channels)
const override;
358 int get_matrix_stride(
int num_batches,
int num_rows,
int num_cols,
int num_output_channels)
const override;
367 std::pair<unsigned int, unsigned int> get_output_shape(
370 bool padding_same)
const override;
399 const ITensor *transformed_output,
400 const int matrix_stride,
402 const int num_batches,
405 const int num_channels,
423 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
424 using WinogradConv =
typename WinogradBase::template Convolution<T, T>;
425 using OutputTransform =
typename WinogradBase::template OutputTransform<T, T>;
427 std::unique_ptr<OutputTransform> _transform{
nullptr };
429 const ITensor *_transformed_output;
432 int _matrix_row_stride;
467 virtual unsigned int get_weight_storage_size(
int num_output_channels,
int num_input_channels)
const = 0;
475 virtual int get_matrix_stride(
int num_output_channels,
int num_input_channels)
const = 0;
486 virtual void configure(
const ITensor *weights_hwio,
ITensor *output,
const int matrix_stride,
const int num_output_channels,
const int num_input_channels) = 0;
499 template <
typename T,
int OutputTileRows,
int OutputTileCols,
int KernelRows,
int KernelCols>
516 const char *
name()
const override 518 return "NEWinogradLayerTransformWeightsKernel";
534 #ifndef DOXYGEN_SKIP_THIS 543 void configure(
const ITensor *weights_hwio,
ITensor *output,
const int matrix_stride,
const int num_output_channels,
const int num_input_channels)
override;
554 unsigned int get_weight_storage_size(
int num_output_channels,
int num_input_channels)
const override;
563 int get_matrix_stride(
int num_output_channels,
int num_input_channels)
const override;
568 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
569 using WinogradConv =
typename WinogradBase::template Convolution<T, T>;
570 using WeightsTransform =
typename WinogradBase::template WeightsTransform<T, T>;
572 std::unique_ptr<WeightsTransform> _transform{
nullptr };
576 int _num_output_channels;
577 int _num_input_channels;
581 template <
typename TIn,
typename TOut,
int OutputTileRows,
int OutputTileCols,
int KernelRows,
int KernelCols>
586 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
589 using WinogradConv =
typename WinogradBase::template Convolution<TIn, TOut>;
const Window & window() const
The maximum window the kernel can be executed on.
typename WinogradBase::template Convolution< TIn, TOut > WinogradConv
Winograd convolution kernel.
Common interface for all kernels implemented in C++.
Store the tensor's metadata.
arm_compute::ActivationLayerInfo::ActivationFunction Activation
Constant TensorID specifying an equivalent of null tensor.
Interface for Neon tensor.
Copyright (c) 2017-2021 Arm Limited.
virtual void run(const Window &window, const ThreadInfo &info)
Execute the kernel on the passed window.
virtual bool is_parallelisable() const
Indicates whether or not the kernel is parallelisable.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
winograd::WinogradGEMM< OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers > WinogradBase
Winograd base kernel.
Describe a multidimensional execution window.
Neon kernel to perform Winograd.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)