34 #include "src/core/NEON/kernels/convolution/common/utils.hpp" 35 #include "src/core/NEON/kernels/convolution/winograd/winograd.hpp" 56 arm_gemm::Activation arm_gemm_activation_from_acl_activation(
const ActivationLayerInfo &act_info)
58 switch(act_info.activation())
75 inline Status validate_kernel_3x3(
const Size2D input_dims,
const ITensorInfo *
src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
76 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *
dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
83 if(input_dims.width > 4 && input_dims.height > 4)
96 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 105 if(act_info.enabled())
112 inline Status validate_kernel_5x5(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
113 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
118 if(act_info.enabled())
125 inline Status validate_kernel_3x1(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
126 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
132 if(act_info.enabled())
139 inline Status validate_kernel_1x3(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
140 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
147 if(act_info.enabled())
154 inline Status validate_kernel_5x1(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
155 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
161 if(act_info.enabled())
167 inline Status validate_kernel_1x5(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
168 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
174 if(act_info.enabled())
181 inline Status validate_kernel_7x1(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
182 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
188 if(act_info.enabled())
195 inline Status validate_kernel_1x7(
const ITensorInfo *src,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
196 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const WinogradInfo &winograd_info,
const ActivationLayerInfo &act_info)
203 if(act_info.enabled())
210 inline Tensor4DShape internal_get_input_shape(
const ITensorInfo *src)
216 const int in_batches = src->dimension(3);
218 return Tensor4DShape{ in_batches, in_height, in_width, in_channels };
221 Status validate_arguments(
const ITensorInfo *src,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const PadStrideInfo &
conv_info)
227 if(biases !=
nullptr)
234 Size2D winograd_output_tile(
const Size2D &input_dims,
const Size2D &kernel_dims,
DataType data_type)
236 Size2D output_tile = Size2D{};
237 if(kernel_dims == Size2D(3U, 3U))
239 output_tile = (input_dims.width <= 4 || input_dims.height <= 4) ? Size2D(2U, 2U) : Size2D(4U, 4U);
242 output_tile = Size2D(4U, 4U);
245 else if(kernel_dims == Size2D(5U, 5U))
247 output_tile = Size2D(2U, 2U);
249 else if(kernel_dims == Size2D(1U, 3U))
251 output_tile = Size2D(1U, 6U);
253 else if(kernel_dims == Size2D(3U, 1U))
255 output_tile = Size2D(6U, 1U);
257 else if(kernel_dims == Size2D(1U, 5U))
259 output_tile = Size2D(1U, 4U);
261 else if(kernel_dims == Size2D(5U, 1U))
263 output_tile = Size2D(4U, 1U);
265 else if(kernel_dims == Size2D(7U, 1U))
267 output_tile = Size2D(2U, 1U);
269 else if(kernel_dims == Size2D(1U, 7U))
271 output_tile = Size2D(1U, 2U);
276 bool check_support_fast_math(
const Size2D &output_tile,
const Size2D &kernel_size,
DataType data_type)
279 using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
281 const std::vector<WinogradConfiguration> fast_math_winograd_f16 =
283 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3))
286 const std::vector<WinogradConfiguration> fast_math_winograd_f32 =
288 WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(5, 5)),
289 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5))
292 auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
293 std::pair<int, int>(kernel_size.width, kernel_size.height));
298 return std::find(fast_math_winograd_f16.begin(), fast_math_winograd_f16.end(), p) != fast_math_winograd_f16.end();
300 return std::find(fast_math_winograd_f32.begin(), fast_math_winograd_f32.end(), p) != fast_math_winograd_f32.end();
306 inline bool fuse_function_supported(
const ActivationLayerInfo &act_info)
314 : _gemm_function(
std::make_unique<
CpuGemm>()),
319 _transform_input_kernel(nullptr),
320 _transform_weights_kernel(nullptr),
321 _transform_output_kernel(nullptr),
323 _aux_mem(AuxTensorIdx::Count),
329 _input_transformed(),
330 _output_transformed(),
332 _run_activation(false),
355 const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, data_type);
358 if(!enable_fast_math)
361 "This Winograd configuration requires enable_fast_math=true");
364 _is_prepared =
false;
366 std::unique_ptr<ICpuWinogradConv2dTransformInputKernel> transform_input_kernel;
367 std::unique_ptr<ICpuWinogradConv2dTransformWeightsKernel> transform_weights_kernel;
368 std::unique_ptr<ICpuWinogradConv2dTransformOutputKernel> transform_output_kernel;
374 if(kernel_size ==
Size2D(3, 3))
379 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
380 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
381 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
382 n_gemms = config::WinogradBase::N_GEMMS;
383 N_BLOCK = config::WinogradConv::N_BLOCK;
388 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
389 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
390 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
391 n_gemms = config::WinogradBase::N_GEMMS;
392 N_BLOCK = config::WinogradConv::N_BLOCK;
395 else if(kernel_size ==
Size2D(5, 5))
398 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
399 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
400 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
401 n_gemms = config::WinogradBase::N_GEMMS;
402 N_BLOCK = config::WinogradConv::N_BLOCK;
404 else if(kernel_size ==
Size2D(1, 3))
407 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
408 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
409 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
410 n_gemms = config::WinogradBase::N_GEMMS;
411 N_BLOCK = config::WinogradConv::N_BLOCK;
413 else if(kernel_size ==
Size2D(3, 1))
416 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
417 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
418 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
419 n_gemms = config::WinogradBase::N_GEMMS;
420 N_BLOCK = config::WinogradConv::N_BLOCK;
422 else if(kernel_size ==
Size2D(1, 5))
425 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
426 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
427 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
428 n_gemms = config::WinogradBase::N_GEMMS;
429 N_BLOCK = config::WinogradConv::N_BLOCK;
431 else if(kernel_size ==
Size2D(5, 1))
434 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
435 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
436 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
437 n_gemms = config::WinogradBase::N_GEMMS;
438 N_BLOCK = config::WinogradConv::N_BLOCK;
440 else if(kernel_size ==
Size2D(1, 7))
443 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
444 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
445 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
446 n_gemms = config::WinogradBase::N_GEMMS;
447 N_BLOCK = config::WinogradConv::N_BLOCK;
449 else if(kernel_size ==
Size2D(7, 1))
452 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
453 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
454 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
455 n_gemms = config::WinogradBase::N_GEMMS;
456 N_BLOCK = config::WinogradConv::N_BLOCK;
463 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 466 if(kernel_size ==
Size2D(3, 3))
469 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
470 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
471 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
472 n_gemms = config::WinogradBase::N_GEMMS;
473 N_BLOCK = config::WinogradConv::N_BLOCK;
480 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 486 const PaddingType use_padding_type = (conv_info.
pad_top() != 0u || conv_info.
pad_left() != 0) ? PADDING_SAME : PADDING_VALID;
487 const bool use_same_padding = use_padding_type == PADDING_SAME;
490 const int in_channels = src->
dimension(channel_idx);
491 const int out_channels = dst->
dimension(channel_idx);
493 const Tensor4DShape in_shape(internal_get_input_shape(src));
496 constexpr
size_t storage_alignment = 64;
499 const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels, in_channels) * data_type_size;
502 const size_t input_storage_size = transform_input_kernel->get_input_storage_size(in_shape.n_batches, in_shape.n_channels, in_shape.n_rows, in_shape.n_cols, use_same_padding) * data_type_size;
505 const size_t output_storage_size = transform_output_kernel->get_output_storage_size(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels) * data_type_size;
506 const int kernel_matrix_stride = transform_weights_kernel->get_matrix_stride(out_channels, in_channels);
507 const int output_matrix_stride = transform_output_kernel->get_matrix_stride(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels);
508 const auto output_shape = transform_output_kernel->get_output_shape(in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
509 const int input_matrix_stride = transform_input_kernel->get_matrix_stride(in_shape.n_batches, in_channels, in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
514 const int m = in_shape.n_batches * tile_rows * tile_cols;
515 const int k = in_shape.n_channels;
516 const int n = out_channels;
517 const int kernel_matrix_row_stride =
roundup(out_channels, N_BLOCK);
518 const int output_matrix_row_stride = kernel_matrix_row_stride;
521 Strides a_strides(data_type_size);
522 a_strides.
set(1, a_strides[0] * k);
525 a_strides.
set(3, data_type_size * input_matrix_stride);
528 Strides b_strides(data_type_size);
529 b_strides.
set(1, data_type_size * kernel_matrix_row_stride);
530 b_strides.
set(2, data_type_size * kernel_matrix_stride);
533 Strides d_strides(data_type_size);
534 d_strides.
set(1, data_type_size * output_matrix_row_stride);
537 d_strides.
set(3, data_type_size * output_matrix_stride);
542 a_info.
init(a_shape, 1, data_type, a_strides, 0, input_storage_size);
543 b_info.init(b_shape, 1, data_type, b_strides, 0, kernel_storage_size);
544 d_info.init(d_shape, 1, data_type, d_strides, 0, output_storage_size);
546 _input_transformed = a_info;
547 _kernel_storage = b_info;
548 _output_transformed = d_info;
559 input_to_use = &_input_nhwc;
564 transform_input_kernel->configure(input_to_use, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
565 &_input_transformed, input_matrix_stride, &_input_workspace);
566 const size_t input_workspace_size = transform_input_kernel->get_working_space_size(max_num_threads);
568 _input_workspace = input_workspace_info;
571 _permute_weights->configure(weights, &_weights_hwio, weights_permutation_vector);
572 transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
575 _gemm_function->configure(&_input_transformed, &_kernel_storage,
nullptr, &_output_transformed, 1.0f, 0.f);
586 output_to_use = &_output_nhwc;
590 transform_output_kernel->configure(biases,
591 &_output_transformed,
592 output_matrix_stride,
601 const size_t output_workspace_size = transform_output_kernel->get_working_space_size(max_num_threads);
603 _output_workspace = output_workspace_info;
611 _transform_input_kernel = std::move(transform_input_kernel);
612 _transform_weights_kernel = std::move(transform_weights_kernel);
613 _transform_output_kernel = std::move(transform_output_kernel);
616 _run_activation = act_info.
enabled() && !fuse_function_supported(act_info);
619 _activation_func->configure(dst,
nullptr, act_info);
622 auto asm_mem_req = _gemm_function->workspace();
623 _aux_mem[GemmWorkspace] = asm_mem_req[GemmWorkspace];
624 _aux_mem[Pretranspose] = asm_mem_req[Pretranspose];
625 _aux_mem[InterleavedLHS] = asm_mem_req[InterleavedLHS];
626 _aux_mem[TransposedRHS] = asm_mem_req[TransposedRHS];
627 _aux_mem[TempResult] = asm_mem_req[TempResult];
630 _aux_mem[TransformedInput] =
MemoryInfo(
offset_int_vec(TransformedInput), MemoryLifetime::Temporary, input_storage_size, storage_alignment);
631 _aux_mem[TransformedOutput] =
MemoryInfo(
offset_int_vec(TransformedOutput), MemoryLifetime::Temporary, output_storage_size, storage_alignment);
632 _aux_mem[WorkspaceIO] =
MemoryInfo(
offset_int_vec(WorkspaceIO), MemoryLifetime::Temporary, std::max(input_workspace_size, output_workspace_size));
634 _aux_mem[TransformedWeights] =
MemoryInfo(
offset_int_vec(TransformedWeights), MemoryLifetime::Persistent, kernel_storage_size, storage_alignment);
656 const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, data_type);
659 if(!enable_fast_math)
662 "This Winograd configuration requires enable_fast_math=true");
673 const TensorInfo input0 = src->
clone()->set_tensor_shape(input0_shape);
676 const TensorInfo input1 = weights->
clone()->set_tensor_shape(input1_shape);
680 const TensorInfo batched_mm_output = input0.
clone()->set_tensor_shape(batched_mm_output_shape);
682 if(kernel_size ==
Size2D(3, 3))
691 return validate_kernel_3x3(input_dims, src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
693 else if(kernel_size ==
Size2D(5, 5))
702 return validate_kernel_5x5(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
704 if(kernel_size ==
Size2D(3, 1))
709 return validate_kernel_3x1(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
711 else if(kernel_size ==
Size2D(1, 3))
716 return validate_kernel_1x3(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
718 else if(kernel_size ==
Size2D(5, 1))
723 return validate_kernel_5x1(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
725 else if(kernel_size ==
Size2D(1, 5))
730 return validate_kernel_1x5(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
732 else if(kernel_size ==
Size2D(7, 1))
737 return validate_kernel_7x1(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
739 else if(kernel_size ==
Size2D(1, 7))
744 return validate_kernel_1x7(src, &input0, &input1, &batched_mm_output, weights, biases, dst, winograd_info, act_info);
769 _permute_input->run(
pack);
785 _gemm_function->run(gemm_pack);
797 _permute_output->run(
pack);
803 _activation_func->run(
pack);
818 _permute_weights->run(permute_tensors);
830 _gemm_function->prepare(gemm_pack);
T roundup(const T a, const T b)
void set(size_t dimension, T value, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Basic function to run kernels::CpuActivationKernel.
TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd input transform shape.
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
bool merge(int slot, size_t new_size, size_t new_alignment=0) noexcept
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
1 channel, 1 U8 per channel
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
T iceildiv(const T a, const T b)
1 channel, 1 F32 per channel
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
Strides PermutationVector
Permutation vector.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
unsigned int pad_top() const
Get the top padding.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for CPU tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
std::vector< MemoryInfo > MemoryRequirements
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
CpuWinogradConv2d()
Constructor.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const override
Returns the total size of the tensor in bytes.
Basic function to run kernels::CpuPermuteKernel.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
unsigned int pad_right() const
Get the right padding.
Padding and stride information class.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
Basic function to execute GEMM.
TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd filter transform shape.
void prepare(ITensorPack &constants) override
Prepare the function for executing.
Num samples, channels, height, width.
void init(Format format)
Initialize the tensor info with just a format.
Strides of an item in bytes.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CpuWinogradConv2d.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Kernel to perform Winograd.
#define ARM_COMPUTE_RETURN_ERROR_MSG(...)
An error is returned with the given description.
Upper Bounded Rectifier ( )
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
Target polymorphic_cast(Source *v)
Polymorphic cast between two types.
size_t width
Width of the image region or rectangle.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
int offset_int_vec(int offset)
virtual unsigned int num_threads() const =0
Returns the number of threads that the SingleThreadScheduler has in his pool.
unsigned int pad_bottom() const
Get the bottom padding.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType
Available data types.
unsigned int pad_left() const
Get the left padding.
DataLayout
[DataLayout enum definition]
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static IScheduler & get()
Access the scheduler singleton.
~CpuWinogradConv2d()
Destructor.