49 Size2D winograd_output_tile(
const Size2D &input_dims,
const Size2D &kernel_dims,
DataLayout data_layout)
51 Size2D output_tile = Size2D{};
53 const unsigned int kernel_max_dim = std::max(kernel_dims.width, kernel_dims.height);
56 const bool is_input_lt4_nchw = (input_dims.width <= 4 && input_dims.height <= 4) && (data_layout ==
DataLayout::NCHW);
58 if(kernel_max_dim == 3U)
60 if(kernel_dims == Size2D(3U, 3U))
62 output_tile = is_input_lt4_nchw ? Size2D(2U, 2U) : Size2D(4
U, 4
U);
64 else if(kernel_dims == Size2D(3U, 1U))
66 output_tile = is_input_lt4_nchw ? Size2D(2U, 1U) : Size2D(4
U, 1
U);
70 output_tile = is_input_lt4_nchw ? Size2D(1U, 2U) : Size2D(1
U, 4
U);
73 else if(kernel_max_dim == 5U)
75 output_tile = Size2D(kernel_dims.width == 1 ? 1U : 4U,
76 kernel_dims.height == 1 ? 1U : 4U);
78 else if(kernel_max_dim == 7U)
80 output_tile = Size2D(kernel_dims.width == 1 ? 1U : 2U,
81 kernel_dims.height == 1 ? 1U : 2U);
87 bool check_support_fast_math(
const Size2D &output_tile,
const Size2D &kernel_size)
90 using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
92 std::vector<WinogradConfiguration> fast_math_winograd =
94 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
95 WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7))
98 auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
99 std::pair<int, int>(kernel_size.width, kernel_size.height));
101 return std::find(fast_math_winograd.begin(), fast_math_winograd.end(), p) != fast_math_winograd.end();
104 Status validate_arguments(
const ITensorInfo *
src,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *
dst,
const PadStrideInfo &
conv_info,
105 const ActivationLayerInfo &act_info,
bool enable_fast_math)
112 const Size2D input_dims = Size2D(src->tensor_shape()[
idx_width], src->tensor_shape()[
idx_height]);
113 const Size2D kernel_size = Size2D(weights->tensor_shape()[
idx_width], weights->tensor_shape()[
idx_height]);
114 const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, src->data_layout());
116 ARM_COMPUTE_RETURN_ERROR_ON_MSG(((conv_info.pad_left() > (kernel_size.x() / 2u)) || (conv_info.pad_right() > (kernel_size.x() / 2u))),
"Winograd only supports padding up to half kernel size");
117 ARM_COMPUTE_RETURN_ERROR_ON_MSG(((conv_info.pad_top() > (kernel_size.y() / 2u)) || (conv_info.pad_bottom() > (kernel_size.y() / 2u))),
"Winograd only supports padding up to half kernel size");
120 if(!enable_fast_math)
126 const WinogradInfo winograd_info = WinogradInfo(output_tile,
134 const TensorInfo input0 = src->clone()->set_tensor_shape(input0_shape);
139 const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape);
143 TensorShape batched_mm_output_shape = input0.tensor_shape();
144 batched_mm_output_shape[0] = input1.tensor_shape()[0];
145 const TensorInfo batched_mm_output = input0.clone()->set_tensor_shape(batched_mm_output_shape);
146 ARM_COMPUTE_RETURN_ON_ERROR(
ClGemm::validate(&input0, &input1,
nullptr, &batched_mm_output, 1.0f, 0.0f, GEMMInfo(
false,
false,
true , 0,
false,
false,
147 GEMMLowpOutputStageInfo(), (src->data_type() ==
DataType::F16))));
156 ClWinogradConv2d::ClWinogradConv2d()
164 _batched_mm_output(),
183 const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, src->
data_layout());
186 if(!enable_fast_math)
189 ARM_COMPUTE_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size),
"This Winograd configuration requires enable_fast_math=true");
197 _is_prepared =
false;
200 _input_transform->configure(compile_context, src, &_input0, winograd_info);
204 _filter_transform->configure(compile_context, weights, &_input1, winograd_info);
207 _batched_mm.
configure(compile_context, &_input0, &_input1,
nullptr, &_batched_mm_output, 1.0f, 0.0f,
GEMMInfo(
false,
false,
true , 0,
213 _output_transform->configure(compile_context, &_batched_mm_output, biases, dst, winograd_info, act_info);
216 const MemoryLifetime wino_wei_lifetm = std::any_of(std::begin(_aux_mem),
std::end(_aux_mem), [](
const auto & r)
218 return (r.lifetime == MemoryLifetime::Persistent) && (r.size > 0);
220 MemoryLifetime::Prepare :
221 MemoryLifetime::Persistent;
236 const bool is_gemm_reshaped = _aux_mem[3].lifetime == MemoryLifetime::Prepare;
262 _batched_mm.
run(pack_mm);
289 weights->mark_as_unused();
294 _batched_mm.
prepare(mm_prepare_pack);
Class describing the value of a pixel for any image format.
void configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value=PixelValue())
Initialise the kernel's input, output and border mode.
TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd input transform shape.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
static CLScheduler & get()
Access the scheduler singleton.
~ClWinogradConv2d()
Default destructor.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
void configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
Initialise the kernel's inputs and output.
1 channel, 1 F32 per channel
const DataLayout data_layout
void prepare(ITensorPack &tensors) override
Prepare the function for executing.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Activation Layer Information class.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
1 channel, 1 F16 per channel
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
void remove_tensor(int id)
Remove the tensor stored with the given id.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const override
Returns the total size of the tensor in bytes.
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush=true)
Schedule the execution of the passed kernel if possible.
GEMMLowp output stage info.
Padding and stride information class.
void end(TokenStream &in, bool &valid)
void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
Calculate the winograd filter transform shape.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
Num samples, channels, height, width.
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Interface for OpenCL tensor.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
void prepare(ITensorPack &constants) override
Prepare the function for executing.
int offset_int_vec(int offset)
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
DataLayout
[DataLayout enum definition]
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.