33 #include "src/core/NEON/kernels/convolution/common/utils.hpp" 34 #include "src/core/NEON/kernels/convolution/winograd/winograd.hpp" 55 arm_gemm::Activation arm_gemm_activation_from_acl_activation(
const ActivationLayerInfo &act_info)
57 switch(act_info.activation())
74 inline Status validate_kernel_3x3(
const Size2D input_dims,
const ITensorInfo *
src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
75 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *
dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
82 if(input_dims.width > 4 && input_dims.height > 4)
95 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 104 if(act_info.enabled())
111 inline Status validate_kernel_5x5(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
112 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
117 if(act_info.enabled())
124 inline Status validate_kernel_3x1(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
125 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
131 if(act_info.enabled())
138 inline Status validate_kernel_1x3(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
139 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
146 if(act_info.enabled())
153 inline Status validate_kernel_5x1(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
154 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
160 if(act_info.enabled())
166 inline Status validate_kernel_1x5(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
167 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
173 if(act_info.enabled())
180 inline Status validate_kernel_7x1(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
181 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
187 if(act_info.enabled())
194 inline Status validate_kernel_1x7(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
195 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
202 if(act_info.enabled())
209 inline Tensor4DShape internal_get_input_shape(
const ITensorInfo *src)
215 const int in_batches = src->dimension(3);
217 return Tensor4DShape{ in_batches, in_height, in_width, in_channels };
220 Status validate_arguments(
const ITensorInfo *src,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const PadStrideInfo &
conv_info)
226 if(biases !=
nullptr)
233 Size2D winograd_output_tile(
const Size2D &input_dims,
const Size2D &kernel_dims,
DataType data_type)
235 Size2D output_tile = Size2D{};
236 if(kernel_dims == Size2D(3U, 3U))
238 output_tile = (input_dims.width <= 4 || input_dims.height <= 4) ? Size2D(2U, 2U) : Size2D(4U, 4U);
241 output_tile = Size2D(4U, 4U);
244 else if(kernel_dims == Size2D(5U, 5U))
246 output_tile = Size2D(2U, 2U);
248 else if(kernel_dims == Size2D(1U, 3U))
250 output_tile = Size2D(1U, 6U);
252 else if(kernel_dims == Size2D(3U, 1U))
254 output_tile = Size2D(6U, 1U);
256 else if(kernel_dims == Size2D(1U, 5U))
258 output_tile = Size2D(1U, 4U);
260 else if(kernel_dims == Size2D(5U, 1U))
262 output_tile = Size2D(4U, 1U);
264 else if(kernel_dims == Size2D(7U, 1U))
266 output_tile = Size2D(2U, 1U);
268 else if(kernel_dims == Size2D(1U, 7U))
270 output_tile = Size2D(1U, 2U);
275 bool check_support_fast_math(
const Size2D &output_tile,
const Size2D &kernel_size,
DataType data_type)
278 using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
280 const std::vector<WinogradConfiguration> fast_math_winograd_f16 =
282 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3))
285 const std::vector<WinogradConfiguration> fast_math_winograd_f32 =
287 WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(5, 5)),
288 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5))
291 auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
292 std::pair<int, int>(kernel_size.width, kernel_size.height));
297 return std::find(fast_math_winograd_f16.begin(), fast_math_winograd_f16.end(), p) != fast_math_winograd_f16.end();
299 return std::find(fast_math_winograd_f32.begin(), fast_math_winograd_f32.end(), p) != fast_math_winograd_f32.end();
305 inline bool fuse_function_supported(
const ActivationLayerInfo &act_info)
313 : _gemm_function(
std::make_unique<
CpuGemm>()),
318 _transform_input_kernel(nullptr),
319 _transform_weights_kernel(nullptr),
320 _transform_output_kernel(nullptr),
322 _aux_mem(AuxTensorIdx::Count),
328 _input_transformed(),
329 _output_transformed(),
331 _run_activation(false),
353 const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, data_type);
356 if(!enable_fast_math)
359 "This Winograd configuration requires enable_fast_math=true");
362 _is_prepared =
false;
364 std::unique_ptr<ICpuWinogradConv2dTransformInputKernel> transform_input_kernel;
365 std::unique_ptr<ICpuWinogradConv2dTransformWeightsKernel> transform_weights_kernel;
366 std::unique_ptr<ICpuWinogradConv2dTransformOutputKernel> transform_output_kernel;
372 if(kernel_size ==
Size2D(3, 3))
377 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
378 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
379 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
380 n_gemms = config::WinogradBase::N_GEMMS;
381 N_BLOCK = config::WinogradConv::N_BLOCK;
386 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
387 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
388 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
389 n_gemms = config::WinogradBase::N_GEMMS;
390 N_BLOCK = config::WinogradConv::N_BLOCK;
393 else if(kernel_size ==
Size2D(5, 5))
396 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
397 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
398 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
399 n_gemms = config::WinogradBase::N_GEMMS;
400 N_BLOCK = config::WinogradConv::N_BLOCK;
402 else if(kernel_size ==
Size2D(1, 3))
405 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
406 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
407 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
408 n_gemms = config::WinogradBase::N_GEMMS;
409 N_BLOCK = config::WinogradConv::N_BLOCK;
411 else if(kernel_size ==
Size2D(3, 1))
414 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
415 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
416 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
417 n_gemms = config::WinogradBase::N_GEMMS;
418 N_BLOCK = config::WinogradConv::N_BLOCK;
420 else if(kernel_size ==
Size2D(1, 5))
423 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
424 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
425 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
426 n_gemms = config::WinogradBase::N_GEMMS;
427 N_BLOCK = config::WinogradConv::N_BLOCK;
429 else if(kernel_size ==
Size2D(5, 1))
432 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
433 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
434 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
435 n_gemms = config::WinogradBase::N_GEMMS;
436 N_BLOCK = config::WinogradConv::N_BLOCK;
438 else if(kernel_size ==
Size2D(1, 7))
441 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
442 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
443 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
444 n_gemms = config::WinogradBase::N_GEMMS;
445 N_BLOCK = config::WinogradConv::N_BLOCK;
447 else if(kernel_size ==
Size2D(7, 1))
450 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
451 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
452 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
453 n_gemms = config::WinogradBase::N_GEMMS;
454 N_BLOCK = config::WinogradConv::N_BLOCK;
461 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 464 if(kernel_size ==
Size2D(3, 3))
467 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
468 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
469 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
470 n_gemms = config::WinogradBase::N_GEMMS;
471 N_BLOCK = config::WinogradConv::N_BLOCK;
478 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 484 const PaddingType use_padding_type = (conv_info.
pad_top() != 0u || conv_info.
pad_left() != 0) ? PADDING_SAME : PADDING_VALID;
485 const bool use_same_padding = use_padding_type == PADDING_SAME;
488 const int in_channels = src->
dimension(channel_idx);
489 const int out_channels = dst->
dimension(channel_idx);
491 const Tensor4DShape in_shape(internal_get_input_shape(src));
494 constexpr
size_t storage_alignment = 64;
497 const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels,
502 const size_t input_storage_size = transform_input_kernel->get_input_storage_size(in_shape.n_batches, in_shape.n_channels, in_shape.n_rows, in_shape.n_cols,
507 const size_t output_storage_size = transform_output_kernel->get_output_storage_size(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels) * data_type_size;
508 const int kernel_matrix_stride = transform_weights_kernel->get_matrix_stride(out_channels, in_channels);
509 const int output_matrix_stride = transform_output_kernel->get_matrix_stride(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels);
510 const auto output_shape = transform_output_kernel->get_output_shape(in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
511 const int input_matrix_stride = transform_input_kernel->get_matrix_stride(in_shape.n_batches, in_channels, in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
516 const int m = in_shape.n_batches * tile_rows * tile_cols;
517 const int k = in_shape.n_channels;
518 const int n = out_channels;
519 const int kernel_matrix_row_stride =
roundup(out_channels, N_BLOCK);
520 const int output_matrix_row_stride = kernel_matrix_row_stride;
523 Strides a_strides(data_type_size);
524 a_strides.
set(1, a_strides[0] * k);
527 a_strides.
set(3, data_type_size * input_matrix_stride);
530 Strides b_strides(data_type_size);
531 b_strides.
set(1, data_type_size * kernel_matrix_row_stride);
532 b_strides.
set(2, data_type_size * kernel_matrix_stride);
535 Strides d_strides(data_type_size);
536 d_strides.
set(1, data_type_size * output_matrix_row_stride);
539 d_strides.
set(3, data_type_size * output_matrix_stride);
544 a_info.
init(a_shape, 1, data_type, a_strides, 0, input_storage_size);
545 b_info.init(b_shape, 1, data_type, b_strides, 0, kernel_storage_size);
546 d_info.init(d_shape, 1, data_type, d_strides, 0, output_storage_size);
548 _input_transformed = a_info;
549 _kernel_storage = b_info;
550 _output_transformed = d_info;
562 input_to_use = &_input_nhwc;
567 transform_input_kernel->configure(input_to_use, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
568 &_input_transformed, input_matrix_stride, &_input_workspace);
569 const size_t input_workspace_size = transform_input_kernel->get_working_space_size(max_num_threads);
571 _input_workspace = input_workspace_info;
574 _permute_weights->configure(weights, &_weights_hwio, weights_permutation_vector);
575 transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
578 _gemm_function->configure(&_input_transformed, &_kernel_storage,
nullptr, &_output_transformed, 1.0f, 0.f);
589 output_to_use = &_output_nhwc;
593 transform_output_kernel->configure(biases,
594 &_output_transformed,
595 output_matrix_stride,
604 const size_t output_workspace_size = transform_output_kernel->get_working_space_size(max_num_threads);
606 _output_workspace = output_workspace_info;
615 _transform_input_kernel = std::move(transform_input_kernel);
616 _transform_weights_kernel = std::move(transform_weights_kernel);
617 _transform_output_kernel = std::move(transform_output_kernel);
620 _run_activation = act_info.
enabled() && !fuse_function_supported(act_info);
623 _activation_func->configure(dst,
nullptr, act_info);
626 auto asm_mem_req = _gemm_function->workspace();
627 _aux_mem[GemmWorkspace] = asm_mem_req[GemmWorkspace];
628 _aux_mem[Pretranspose] = asm_mem_req[Pretranspose];
629 _aux_mem[InterleavedLHS] = asm_mem_req[InterleavedLHS];
630 _aux_mem[TransposedRHS] = asm_mem_req[TransposedRHS];
631 _aux_mem[TempResult] = asm_mem_req[TempResult];
633 _aux_mem[InputTransformed] =
MemoryInfo(
offset_int_vec(InputTransformed), MemoryLifetime::Temporary, input_storage_size, storage_alignment);
634 _aux_mem[InputWorkspace] =
MemoryInfo(
offset_int_vec(InputWorkspace), MemoryLifetime::Temporary, input_workspace_size);
636 _aux_mem[WeightsTransformed] =
MemoryInfo(
offset_int_vec(WeightsTransformed), MemoryLifetime::Persistent, kernel_storage_size, storage_alignment);
637 _aux_mem[OutputTransformed] =
MemoryInfo(
offset_int_vec(OutputTransformed), MemoryLifetime::Temporary, output_storage_size, storage_alignment);
638 _aux_mem[OutputWorkspace] =
MemoryInfo(
offset_int_vec(OutputWorkspace), MemoryLifetime::Temporary, output_workspace_size);
655 const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, data_type);
658 if(!enable_fast_math)
661 "This Winograd configuration requires enable_fast_math=true");
672 const TensorInfo input0 = src->
clone()->set_tensor_shape(input0_shape);
675 const TensorInfo input1 = weights->
clone()->set_tensor_shape(input1_shape);
679 const TensorInfo batched_mm_output = input0.
clone()->set_tensor_shape(batched_mm_output_shape);
681 if(kernel_size ==
Size2D(3, 3))
690 return validate_kernel_3x3(input_dims, src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
692 else if(kernel_size ==
Size2D(5, 5))
701 return validate_kernel_5x5(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
703 if(kernel_size ==
Size2D(3, 1))
708 return validate_kernel_3x1(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
710 else if(kernel_size ==
Size2D(1, 3))
715 return validate_kernel_1x3(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
717 else if(kernel_size ==
Size2D(5, 1))
722 return validate_kernel_5x1(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
724 else if(kernel_size ==
Size2D(1, 5))
729 return validate_kernel_1x5(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
731 else if(kernel_size ==
Size2D(7, 1))
736 return validate_kernel_7x1(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
738 else if(kernel_size ==
Size2D(1, 7))
743 return validate_kernel_1x7(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
769 _permute_input->run(
pack);
781 _gemm_function->run(gemm_pack);
792 _permute_output->run(
pack);
798 _activation_func->run(
pack);
813 _permute_weights->run(permute_tensors);
825 _gemm_function->prepare(gemm_pack);
T roundup(const T a, const T b)
void set(size_t dimension, T value, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Basic function to run kernels::CpuActivationKernel.
TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd input transform shape.
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
1 channel, 1 U8 per channel
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
T iceildiv(const T a, const T b)
1 channel, 1 F32 per channel
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
Strides PermutationVector
Permutation vector.
const DataLayout data_layout
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
unsigned int pad_top() const
Get the top padding.
arm_compute::ActivationLayerInfo::ActivationFunction Activation
Constant TensorID specifying an equivalent of null tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for CPU tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
std::vector< MemoryInfo > MemoryRequirements
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
CpuWinogradConv2d()
Constructor.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const override
Returns the total size of the tensor in bytes.
Basic function to run kernels::CpuPermuteKernel.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
unsigned int pad_right() const
Get the right padding.
Padding and stride information class.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
Basic function to execute GEMM.
TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd filter transform shape.
void prepare(ITensorPack &constants) override
Prepare the function for executing.
Num samples, channels, height, width.
void init(Format format)
Initialize the tensor info with just a format.
Strides of an item in bytes.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CpuWinogradConv2d.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Kernel to perform Winograd.
#define ARM_COMPUTE_RETURN_ERROR_MSG(...)
An error is returned with the given description.
Upper Bounded Rectifier ( )
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
Target polymorphic_cast(Source *v)
Polymorphic cast between two types.
size_t width
Width of the image region or rectangle.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
int offset_int_vec(int offset)
virtual unsigned int num_threads() const =0
Returns the number of threads that the SingleThreadScheduler has in his pool.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
unsigned int pad_bottom() const
Get the bottom padding.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType
Available data types.
unsigned int pad_left() const
Get the left padding.
DataLayout
[DataLayout enum definition]
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static IScheduler & get()
Access the scheduler singleton.
~CpuWinogradConv2d()
Destructor.