39 #include "src/core/NEON/kernels/convolution/common/utils.hpp" 40 #include "src/core/NEON/kernels/convolution/winograd/winograd.hpp" 46 inline Status validate_kernel_3x3(
const Size2D input_dims,
const ITensorInfo *
input,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
47 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const WinogradInfo &
winograd_info,
const ActivationLayerInfo &act_info)
54 if(input_dims.width > 4 && input_dims.height > 4)
67 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 76 if(act_info.enabled())
83 inline Status validate_kernel_5x5(
const ITensorInfo *
input,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
84 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const WinogradInfo &
winograd_info,
const ActivationLayerInfo &act_info)
89 if(act_info.enabled())
96 inline Status validate_kernel_3x1(
const ITensorInfo *
input,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
97 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const WinogradInfo &
winograd_info,
const ActivationLayerInfo &act_info)
103 if(act_info.enabled())
110 inline Status validate_kernel_1x3(
const ITensorInfo *
input,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
111 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const WinogradInfo &
winograd_info,
const ActivationLayerInfo &act_info)
118 if(act_info.enabled())
125 inline Status validate_kernel_5x1(
const ITensorInfo *
input,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
126 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const WinogradInfo &
winograd_info,
const ActivationLayerInfo &act_info)
132 if(act_info.enabled())
138 inline Status validate_kernel_1x5(
const ITensorInfo *
input,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
139 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const WinogradInfo &
winograd_info,
const ActivationLayerInfo &act_info)
145 if(act_info.enabled())
152 inline Status validate_kernel_7x1(
const ITensorInfo *
input,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
153 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const WinogradInfo &
winograd_info,
const ActivationLayerInfo &act_info)
159 if(act_info.enabled())
166 inline Status validate_kernel_1x7(
const ITensorInfo *
input,
const TensorInfo *input0,
const TensorInfo *input1,
const TensorInfo *batched_mm_output,
167 const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const WinogradInfo &
winograd_info,
const ActivationLayerInfo &act_info)
174 if(act_info.enabled())
187 const int in_batches =
input->info()->dimension(3);
189 return Tensor4DShape{ in_batches, in_height, in_width, in_channels };
192 Status
validate_arguments(
const ITensorInfo *
input,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const PadStrideInfo &
conv_info)
198 if(biases !=
nullptr)
206 Size2D winograd_output_tile(
const Size2D &input_dims,
const Size2D &kernel_dims,
DataType data_type)
208 Size2D output_tile = Size2D{};
209 if(kernel_dims == Size2D(3U, 3U))
211 output_tile = (input_dims.width <= 4 || input_dims.height <= 4) ? Size2D(2U, 2U) : Size2D(4U, 4U);
214 output_tile = Size2D(4U, 4U);
217 else if(kernel_dims == Size2D(5U, 5U))
219 output_tile = Size2D(2U, 2U);
221 else if(kernel_dims == Size2D(1U, 3U))
223 output_tile = Size2D(1U, 6U);
225 else if(kernel_dims == Size2D(3U, 1U))
227 output_tile = Size2D(6U, 1U);
229 else if(kernel_dims == Size2D(1U, 5U))
231 output_tile = Size2D(1U, 4U);
233 else if(kernel_dims == Size2D(5U, 1U))
235 output_tile = Size2D(4U, 1U);
237 else if(kernel_dims == Size2D(7U, 1U))
239 output_tile = Size2D(2U, 1U);
241 else if(kernel_dims == Size2D(1U, 7U))
243 output_tile = Size2D(1U, 2U);
248 bool check_support_fast_math(
const Size2D &output_tile,
const Size2D &kernel_size,
DataType data_type)
251 using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
253 const std::vector<WinogradConfiguration> fast_math_winograd_f16 =
255 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3))
258 const std::vector<WinogradConfiguration> fast_math_winograd_f32 =
260 WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(5, 5)),
261 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5))
264 auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
265 std::pair<int, int>(kernel_size.width, kernel_size.height));
270 return std::find(fast_math_winograd_f16.begin(), fast_math_winograd_f16.end(), p) != fast_math_winograd_f16.end();
272 return std::find(fast_math_winograd_f32.begin(), fast_math_winograd_f32.end(), p) != fast_math_winograd_f32.end();
278 inline bool fuse_function_supported(
const ActivationLayerInfo &act_info)
283 arm_gemm::Activation arm_gemm_activation_from_acl_activation(
const ActivationLayerInfo &act_info)
285 switch(act_info.activation())
293 return arm_gemm::Activation(arm_gemm::Activation::Type::BoundedReLU, act_info.a(), act_info.b());
304 : _memory_group(memory_manager), _gemm_function(memory_manager), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr), _activationlayer_function(),
305 _permute_input(), _permute_weights(), _permute_output(), _input_transformed(), _output_transformed(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(),
306 _weights_hwio(), _input(), _weights(), _output(), _is_prepared(false), _is_activationlayer_enabled(false), _data_layout()
311 bool enable_fast_math)
317 _data_layout =
input->info()->data_layout();
322 const Size2D input_dims =
Size2D(
input->info()->dimension(width_idx),
input->info()->dimension(height_idx));
325 const Size2D output_tile = winograd_output_tile(input_dims, kernel_size,
data_type);
328 if(!enable_fast_math)
331 "This Winograd configuration requires enable_fast_math=true");
337 _is_prepared =
false;
342 std::unique_ptr<INEWinogradLayerTransformInputKernel> transform_input_kernel;
343 std::unique_ptr<INEWinogradLayerTransformWeightsKernel> transform_weights_kernel;
344 std::unique_ptr<INEWinogradLayerTransformOutputKernel> transform_output_kernel;
348 if(kernel_size ==
Size2D(3, 3))
350 if(
input->info()->dimension(width_idx) > 4 &&
input->info()->dimension(height_idx) > 4)
353 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
354 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
355 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
356 n_gemms = config::WinogradBase::N_GEMMS;
357 N_BLOCK = config::WinogradConv::N_BLOCK;
362 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
363 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
364 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
365 n_gemms = config::WinogradBase::N_GEMMS;
366 N_BLOCK = config::WinogradConv::N_BLOCK;
369 else if(kernel_size ==
Size2D(5, 5))
372 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
373 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
374 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
375 n_gemms = config::WinogradBase::N_GEMMS;
376 N_BLOCK = config::WinogradConv::N_BLOCK;
378 else if(kernel_size ==
Size2D(1, 3))
381 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
382 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
383 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
384 n_gemms = config::WinogradBase::N_GEMMS;
385 N_BLOCK = config::WinogradConv::N_BLOCK;
387 else if(kernel_size ==
Size2D(3, 1))
390 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
391 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
392 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
393 n_gemms = config::WinogradBase::N_GEMMS;
394 N_BLOCK = config::WinogradConv::N_BLOCK;
396 else if(kernel_size ==
Size2D(1, 5))
399 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
400 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
401 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
402 n_gemms = config::WinogradBase::N_GEMMS;
403 N_BLOCK = config::WinogradConv::N_BLOCK;
405 else if(kernel_size ==
Size2D(5, 1))
408 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
409 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
410 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
411 n_gemms = config::WinogradBase::N_GEMMS;
412 N_BLOCK = config::WinogradConv::N_BLOCK;
414 else if(kernel_size ==
Size2D(1, 7))
417 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
418 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
419 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
420 n_gemms = config::WinogradBase::N_GEMMS;
421 N_BLOCK = config::WinogradConv::N_BLOCK;
423 else if(kernel_size ==
Size2D(7, 1))
426 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
427 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
428 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
429 n_gemms = config::WinogradBase::N_GEMMS;
430 N_BLOCK = config::WinogradConv::N_BLOCK;
437 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 440 if(kernel_size ==
Size2D(3, 3))
443 transform_input_kernel = std::make_unique<config::TransformInputKernel>();
444 transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
445 transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
446 n_gemms = config::WinogradBase::N_GEMMS;
447 N_BLOCK = config::WinogradConv::N_BLOCK;
454 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 460 const PaddingType use_padding_type = (
conv_info.pad_top() != 0u ||
conv_info.pad_left() != 0) ? PADDING_SAME : PADDING_VALID;
461 const bool use_same_padding = use_padding_type == PADDING_SAME;
464 const int in_channels =
input->info()->dimension(channel_idx);
465 const int out_channels = output->
info()->
dimension(channel_idx);
467 const Tensor4DShape in_shape(internal_get_input_shape(
input));
468 const size_t data_type_size =
input->info()->element_size();
470 constexpr
size_t storage_alignment = 64;
473 const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels,
478 const size_t input_storage_size = transform_input_kernel->get_input_storage_size(in_shape.n_batches, in_shape.n_channels, in_shape.n_rows, in_shape.n_cols,
483 const size_t output_storage_size = transform_output_kernel->get_output_storage_size(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels) * data_type_size;
484 const int kernel_matrix_stride = transform_weights_kernel->get_matrix_stride(out_channels, in_channels);
485 const int output_matrix_stride = transform_output_kernel->get_matrix_stride(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels);
486 const auto output_shape = transform_output_kernel->get_output_shape(in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
487 const int input_matrix_stride = transform_input_kernel->get_matrix_stride(in_shape.n_batches, in_channels, in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
492 const int m = in_shape.n_batches * tile_rows * tile_cols;
493 const int k = in_shape.n_channels;
494 const int n = out_channels;
495 const int kernel_matrix_row_stride =
roundup(out_channels, N_BLOCK);
496 const int output_matrix_row_stride = kernel_matrix_row_stride;
499 Strides a_strides(data_type_size);
500 a_strides.
set(1, a_strides[0] * k);
503 a_strides.
set(3, data_type_size * input_matrix_stride);
506 Strides b_strides(data_type_size);
507 b_strides.
set(1, data_type_size * kernel_matrix_row_stride);
508 b_strides.
set(2, data_type_size * kernel_matrix_stride);
511 Strides d_strides(data_type_size);
512 d_strides.
set(1, data_type_size * output_matrix_row_stride);
515 d_strides.
set(3, data_type_size * output_matrix_stride);
520 a_info.
init(a_shape, 1,
data_type, a_strides, 0, input_storage_size);
521 b_info.init(b_shape, 1,
data_type, b_strides, 0, kernel_storage_size);
522 d_info.init(d_shape, 1,
data_type, d_strides, 0, output_storage_size);
524 _input_transformed.
allocator()->
init(a_info, storage_alignment);
525 _kernel_storage.
allocator()->
init(b_info, storage_alignment);
526 _output_transformed.
allocator()->
init(d_info, storage_alignment);
534 const ITensor *input_to_use = _input;
535 ITensor *output_to_use = _output;
542 _memory_group.
manage(&_input_nhwc);
544 input_to_use = &_input_nhwc;
549 _memory_group.
manage(&_input_transformed);
550 _memory_group.
manage(&_input_workspace);
551 transform_input_kernel->configure(input_to_use, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
552 &_input_transformed, input_matrix_stride, &_input_workspace);
553 const size_t input_workspace_size = transform_input_kernel->get_working_space_size(max_num_threads);
563 _permute_weights.
configure(weights, &_weights_hwio, weights_permutation_vector);
564 transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
567 _memory_group.
manage(&_output_transformed);
568 _gemm_function.
configure(&_input_transformed, &_kernel_storage,
nullptr, &_output_transformed, 1.0f, 0.f);
575 _memory_group.
manage(&_output_nhwc);
576 output_to_use = &_output_nhwc;
580 transform_output_kernel->configure(biases,
581 &_output_transformed,
582 output_matrix_stride,
591 const size_t output_workspace_size = transform_output_kernel->get_working_space_size(max_num_threads);
604 _transform_input_kernel = std::move(transform_input_kernel);
605 _transform_weights_kernel = std::move(transform_weights_kernel);
606 _transform_output_kernel = std::move(transform_output_kernel);
609 _is_activationlayer_enabled = act_info.
enabled() && !fuse_function_supported(act_info);
610 if(_is_activationlayer_enabled)
612 _activationlayer_function.
configure(_output,
nullptr, act_info);
625 _permute_input.
run();
632 _gemm_function.
run();
640 _permute_output.
run();
643 if(_is_activationlayer_enabled)
645 _activationlayer_function.
run();
663 const Size2D output_tile = winograd_output_tile(input_dims, kernel_size,
data_type);
666 if(!enable_fast_math)
669 "This Winograd configuration requires enable_fast_math=true");
676 input->data_layout());
680 const TensorInfo input0 =
input->clone()->set_tensor_shape(input0_shape);
683 const TensorInfo input1 = weights->
clone()->set_tensor_shape(input1_shape);
687 const TensorInfo batched_mm_output = input0.
clone()->set_tensor_shape(batched_mm_output_shape);
689 if(kernel_size ==
Size2D(3, 3))
698 return validate_kernel_3x3(input_dims,
input, &input0, &input1, &batched_mm_output, weights, biases, output,
winograd_info, act_info);
700 else if(kernel_size ==
Size2D(5, 5))
709 return validate_kernel_5x5(
input, &input0, &input1, &batched_mm_output, weights, biases, output,
winograd_info, act_info);
711 if(kernel_size ==
Size2D(3, 1))
716 return validate_kernel_3x1(
input, &input0, &input1, &batched_mm_output, weights, biases, output,
winograd_info, act_info);
718 else if(kernel_size ==
Size2D(1, 3))
723 return validate_kernel_1x3(
input, &input0, &input1, &batched_mm_output, weights, biases, output,
winograd_info, act_info);
725 else if(kernel_size ==
Size2D(5, 1))
730 return validate_kernel_5x1(
input, &input0, &input1, &batched_mm_output, weights, biases, output,
winograd_info, act_info);
732 else if(kernel_size ==
Size2D(1, 5))
737 return validate_kernel_1x5(
input, &input0, &input1, &batched_mm_output, weights, biases, output,
winograd_info, act_info);
739 else if(kernel_size ==
Size2D(7, 1))
744 return validate_kernel_7x1(
input, &input0, &input1, &batched_mm_output, weights, biases, output,
winograd_info, act_info);
746 else if(kernel_size ==
Size2D(1, 7))
751 return validate_kernel_1x7(
input, &input0, &input1, &batched_mm_output, weights, biases, output,
winograd_info, act_info);
765 _permute_weights.
run();
T roundup(const T a, const T b)
void set(size_t dimension, T value, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd input transform shape.
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
NEWinogradConvolutionLayer(const std::shared_ptr< IMemoryManager > &memory_manager=nullptr)
Constructor.
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
T iceildiv(const T a, const T b)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
1 channel, 1 F32 per channel
Strides PermutationVector
Permutation vector.
const DataLayout data_layout
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
arm_compute::ActivationLayerInfo::ActivationFunction Activation
Constant TensorID specifying an equivalent of null tensor.
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for CPU tensor.
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
1 channel, 1 F16 per channel
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of NEGEMMConvolutionLayer.
void run() override
Run the kernels contained in the function.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd filter transform shape.
void free() override
Free allocated CPU memory.
void prepare() override
Prepare the function for executing.
Num samples, channels, height, width.
void init(Format format)
Initialize the tensor info with just a format.
Strides of an item in bytes.
void run() override
Run the kernels contained in the function.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
#define ARM_COMPUTE_RETURN_ERROR_MSG(...)
An error is returned with the given description.
Memory group resources scope handling class.
Upper Bounded Rectifier ( )
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
size_t width
Width of the image region or rectangle.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
void prepare() override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
void configure(const ITensor *input, ITensor *output, const PermutationVector &perm)
Configure the permute CPP kernel.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void run() override final
Run the kernels contained in the function.
virtual unsigned int num_threads() const =0
Returns the number of threads that the SingleThreadScheduler has in his pool.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType
Available data types.
DataLayout
[DataLayout enum definition]
Kernel to perform Winograd.
static IScheduler & get()
Access the scheduler singleton.