Compute Library
 21.02
NEFFTConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
27 #include "arm_compute/core/Utils.h"
37 
38 namespace arm_compute
39 {
40 namespace
41 {
42 int pad_decomposable(int N)
43 {
44  const auto supported_radix = NEFFTRadixStageKernel::supported_radix();
45 
46  int pad = 0;
47  bool is_decomposed = false;
48  while(!is_decomposed)
49  {
50  const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N++, supported_radix);
51  is_decomposed = !decomposed_vector.empty();
52  if(!is_decomposed)
53  {
54  ++pad;
55  }
56  }
57  return pad;
58 }
59 } // namespace
60 
61 NEFFTConvolutionLayer::NEFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
62  : _memory_group(memory_manager),
63  _flip_weights_func(),
64  _permute_input_func(),
65  _permute_output_func(),
66  _permute_weights_func(),
67  _permute_bias_func(),
68  _pad_input_func(),
69  _pad_weights_func(),
70  _transform_input_func(memory_manager),
71  _transform_weights_func(),
72  _itransform_output_func(memory_manager),
73  _prod_func(),
74  _reduce_func(),
75  _extract_output_func(),
76  _bias_add_func(),
77  _activation_layer_func(),
78  _permuted_input(),
79  _permuted_weights(),
80  _permuted_bias(),
81  _permuted_output(),
82  _padded_input(),
83  _padded_weights(),
84  _flip_axis(),
85  _flipped_weights(),
86  _transformed_input(),
87  _transformed_weights(),
88  _input_weights_product(),
89  _output_product(),
90  _output_reduced(),
91  _itransformed_output(),
92  _reshaped_output(),
93  _bias_output(),
94  _original_weights(nullptr),
95  _original_bias(nullptr),
96  _is_activationlayer_enabled(false),
97  _needs_permute(false),
98  _has_bias(false),
99  _is_prepared(false)
100 {
101 }
103 
104 void NEFFTConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
105  const ActivationLayerInfo &act_info, bool enable_fast_math)
106 {
107  ARM_COMPUTE_UNUSED(enable_fast_math);
108 
109  _original_weights = weights;
110  _original_bias = biases;
111 
112  // Flat if bias addition is required
113  _has_bias = biases != nullptr;
114 
115  // Get indices for the width and height
118 
119  // Input shape, kernel size and output tile
120  const Size2D input_dims = Size2D(input->info()->tensor_shape()[idx_width], input->info()->tensor_shape()[idx_height]);
121  const Size2D kernel_size = Size2D(weights->info()->tensor_shape()[idx_width], weights->info()->tensor_shape()[idx_height]);
122  const Size2D pad_valid = Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
123  pad_decomposable(input_dims.y() + kernel_size.y() - 1));
124  // Tensors to use
125  ITensor *input_to_use = input;
126  const ITensor *weights_to_use = weights;
127  ITensor *output_to_use = _has_bias ? &_bias_output : output;
128 
129  // Permute bias
130  if(biases != nullptr)
131  {
132  _permute_bias_func.configure(biases, &_permuted_bias, PermutationVector(1U, 2U, 0U));
133  _permuted_bias.info()->set_data_layout(DataLayout::NCHW);
134  }
135 
136  // Permute input if needed
137  _needs_permute = input->info()->data_layout() == DataLayout::NHWC;
138  if(_needs_permute)
139  {
140  _memory_group.manage(&_permuted_input);
141  // Configure the function to transform the input tensor from NHWC -> NCHW
142  _permute_input_func.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
143  _permuted_input.info()->set_data_layout(DataLayout::NCHW);
144 
145  // Configure the function to transform the weights tensor from HWI -> IHW
146  _permute_weights_func.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
147  _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
148 
149  input_to_use = &_permuted_input;
150  weights_to_use = &_permuted_weights;
151  }
152 
153  // Flip weights
154  _flipped_weights.allocator()->init(weights_to_use->info()->clone()->set_is_resizable(true).reset_padding());
155  _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
156  _flip_weights_func.configure(weights_to_use, &_flipped_weights, &_flip_axis);
157 
158  // Pad weights
159  const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
160  _pad_weights_func.configure(&_flipped_weights, &_padded_weights, padding_w);
161 
162  // Transform weights
163  _transform_weights_func = std::make_unique<NEFFT2D>();
164  _transform_weights_func->configure(&_padded_weights, &_transformed_weights, FFT2DInfo());
165 
166  // Pad input
167  const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
168  _memory_group.manage(&_padded_input);
169  _pad_input_func.configure(input_to_use, &_padded_input, padding_in);
170  if(_needs_permute)
171  {
172  _permuted_input.allocator()->allocate();
173  }
174 
175  // Transform input
176  _memory_group.manage(&_transformed_input);
177  _transform_input_func.configure(&_padded_input, &_transformed_input, FFT2DInfo());
178  _padded_input.allocator()->allocate();
179 
180  // Perform product
181  _memory_group.manage(&_output_product);
182  _prod_func.configure(&_transformed_input, &_transformed_weights, &_output_product);
183  _transformed_input.allocator()->allocate();
184 
185  // Perform reduction
186  _memory_group.manage(&_output_reduced);
187  _reduce_func.configure(&_output_product, &_output_reduced, 2, ReductionOperation::SUM);
188  _output_product.allocator()->allocate();
189 
190  // Transform output
191  _memory_group.manage(&_itransformed_output);
192  FFT2DInfo itranform_info;
193  itranform_info.direction = FFTDirection::Inverse;
194  _itransformed_output.allocator()->init(_output_reduced.info()->clone()->set_is_resizable(true).set_num_channels(1).reset_padding());
195  _itransform_output_func.configure(&_output_reduced, &_itransformed_output, itranform_info);
196  _output_reduced.allocator()->allocate();
197 
198  // Reshape output
199  TensorShape reshaped_shape = _itransformed_output.info()->tensor_shape();
200  reshaped_shape.remove_dimension(2);
201  _reshaped_output.allocator()->init(_itransformed_output.info()->clone()->set_tensor_shape(reshaped_shape));
202 
203  // Extract correct region
204  const int start_left = kernel_size.x() - conv_info.pad_left() - 1;
205  const int start_top = kernel_size.y() - conv_info.pad_top() - 1;
206  const int end_right = _reshaped_output.info()->tensor_shape().x() - (kernel_size.x() - conv_info.pad_right() - 1) - pad_valid.x();
207  const int end_botton = _reshaped_output.info()->tensor_shape().y() - (kernel_size.y() - conv_info.pad_bottom() - 1) - pad_valid.y();
208  if(_has_bias)
209  {
210  _memory_group.manage(&_bias_output);
211  }
212  else if(_needs_permute)
213  {
214  output_to_use = &_permuted_output;
215  _memory_group.manage(&_permuted_output);
216  }
217  _extract_output_func.configure(&_reshaped_output, output_to_use, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
218  _reshaped_output.allocator()->allocate();
219  _itransformed_output.allocator()->allocate();
220 
221  // Add bias
222  if(biases != nullptr)
223  {
224  output_to_use = output;
225  if(_needs_permute)
226  {
227  output_to_use = &_permuted_output;
228  _memory_group.manage(&_permuted_output);
229  }
230  auto_init_if_empty(*output_to_use->info(), *_bias_output.info());
231  _bias_add_func.configure(&_bias_output, &_permuted_bias, output_to_use, ConvertPolicy::WRAP);
232  _bias_output.allocator()->allocate();
233  }
234 
235  // Permute output
236  if(_needs_permute)
237  {
238  // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
239  _permuted_output.info()->set_data_layout(DataLayout::NCHW);
240  _permute_output_func.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
241 
242  // Allocate tensors
243  _permuted_output.allocator()->allocate();
244  }
245 
246  // Configure Activation Layer
247  _is_activationlayer_enabled = act_info.enabled();
248  if(_is_activationlayer_enabled)
249  {
250  _activation_layer_func.configure(output, nullptr, act_info);
251  }
252 
253  // Setup flip axis data
254  _flip_axis.allocator()->allocate();
255 
256  auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
257  axis_data[0] = 0;
258  axis_data[1] = 1;
259 }
260 
262  const ActivationLayerInfo &act_info, bool enable_fast_math)
263 {
264  ARM_COMPUTE_UNUSED(enable_fast_math);
265 
268 
269  // Get indices for the width and height
272 
273  // Input shape, kernel size and output tile
274  const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
275 
276  // Strides
277  const auto strides = conv_info.stride();
278  ARM_COMPUTE_RETURN_ERROR_ON(strides.first != strides.second && strides.first != 1);
279  ARM_COMPUTE_RETURN_ERROR_ON(kernel_size.x() != kernel_size.y());
280  ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_left() != (kernel_size.x() / 2) || conv_info.pad_right() != (kernel_size.x() / 2));
281  ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_top() != (kernel_size.y() / 2) || conv_info.pad_bottom() != (kernel_size.y() / 2));
282 
283  // Validate biases
284  if(biases != nullptr)
285  {
286  const size_t idx_channels = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
288  ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channels] != biases->tensor_shape().x());
289  }
290 
291  // Checks performed when output is configured
292  if((output != nullptr) && (output->total_size() != 0))
293  {
296 
297  // Validate Activation Layer
298  if(act_info.enabled())
299  {
300  ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
301  }
302  }
303 
304  return Status{};
305 }
306 
308 {
309  prepare();
310 
311  MemoryGroupResourceScope scope_mg(_memory_group);
312 
313  // Transform input
314  if(_needs_permute)
315  {
316  _permute_input_func.run();
317  }
318  _pad_input_func.run();
319  _transform_input_func.run();
320 
321  // Perform operations to frequency domain
322  _prod_func.run();
323 
324  _reduce_func.run();
325 
326  // Transform output
327  _itransform_output_func.run();
328  _reshaped_output.allocator()->import_memory(_itransformed_output.buffer());
329  _extract_output_func.run();
330 
331  // Add bias
332  if(_has_bias)
333  {
334  _bias_add_func.run();
335  }
336  if(_needs_permute)
337  {
338  _permute_output_func.run();
339  }
340 
341  // Run activation layer
342  if(_is_activationlayer_enabled)
343  {
344  _activation_layer_func.run();
345  }
346 }
347 
349 {
350  if(!_is_prepared)
351  {
352  // Permute bias to NCHW
353  if(_original_bias != nullptr)
354  {
355  _permuted_bias.allocator()->allocate();
356  _permute_bias_func.run();
357  _original_bias->mark_as_unused();
358  }
359 
360  const ITensor *cur_weights = _original_weights;
361 
362  // Permute weights
363  if(_needs_permute)
364  {
365  ARM_COMPUTE_ERROR_ON(!cur_weights->is_used());
366 
367  _permuted_weights.allocator()->allocate();
368  _permute_weights_func.run();
369  cur_weights->mark_as_unused();
370  cur_weights = &_permuted_weights;
371  }
372 
373  // Flip weights
374  _flipped_weights.allocator()->allocate();
375  _flip_weights_func.run();
376  cur_weights->mark_as_unused();
377 
378  // Pad weights
379  _padded_weights.allocator()->allocate();
380  _pad_weights_func.run();
381  _flipped_weights.mark_as_unused();
382  _flipped_weights.allocator()->free();
383 
384  // Transform weights to frequency domain
385  _transformed_weights.allocator()->allocate();
386  _transform_weights_func->run();
387  _transform_weights_func.reset();
388 
389  _padded_weights.mark_as_unused();
390  _padded_weights.allocator()->free();
391 
392  _is_prepared = true;
393  }
394 }
395 } // namespace arm_compute
void prepare() override
Prepare the function for executing.
Shape of a tensor.
Definition: TensorShape.h:39
FFTDirection direction
Direction of the FFT.
void remove_dimension(size_t n)
Accessor to remove the dimension n from the tensor shape.
Definition: TensorShape.h:111
void run() override
Run the kernels contained in the function.
void run() override final
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
bool enabled() const
Check if initialised.
Definition: Types.h:1600
std::vector< PaddingInfo > PaddingList
List of padding information.
Definition: Types.h:481
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
Definition: fft.cpp:34
Descriptor used by the FFT2D function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
bool is_used() const
Flags if the tensor is used or not.
Definition: ITensor.cpp:163
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
1 channel, 1 F32 per channel
Strides PermutationVector
Permutation vector.
Definition: Types.h:49
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of NEFFTConvolutionLayer.
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:806
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1550
Interface for Neon tensor.
Definition: ITensor.h:36
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel&#39;s inputs, output and conversion policy.
unsigned int N
Copyright (c) 2017-2021 Arm Limited.
void configure(const ITensor *input, ITensor *output, const ITensor *axis)
Initialize the function.
Definition: NEReverse.cpp:30
TensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: Tensor.cpp:48
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Definition: Tensor.cpp:33
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:168
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:87
void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
Definition: NESlice.cpp:85
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
NEFFTConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
1 channel, 1 U32 per channel
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual ITensorInfo & set_data_layout(const DataLayout &data_layout)=0
Set the data layout of the tensor.
Coordinates of an item.
Definition: Coordinates.h:37
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:770
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
unsigned int pad_right() const
Get the right padding.
Definition: Types.h:801
Padding and stride information class.
Definition: Types.h:722
void free() override
Free allocated CPU memory.
Num samples, channels, height, width.
void run() override
Run the kernels contained in the function.
Definition: NEPermute.cpp:67
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
void run() override
Run the kernels contained in the function.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
Definition: NEPadLayer.cpp:250
void run() override
Run the kernels contained in the function.
Definition: NEFFT2D.cpp:91
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
Definition: Tensor.cpp:43
void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value=PixelValue(), const PaddingMode mode=PaddingMode::CONSTANT)
Initialize the function.
Definition: NEPadLayer.cpp:167
Status import_memory(void *memory)
Import an existing memory as a tensor&#39;s backing memory.
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
T y() const
Alias to access the size of the second dimension.
Definition: Dimensions.h:92
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
unsigned int pad_bottom() const
Get the bottom padding.
Definition: Types.h:811
void configure(const ITensor *input, ITensor *output, const PermutationVector &perm)
Configure the permute Neon kernel.
Definition: NEPermute.cpp:49
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:796
void configure(const ITensor *input, ITensor *output, const FFT2DInfo &config)
Initialise the function&#39;s source and destinations.
Definition: NEFFT2D.cpp:42
void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel&#39;s inputs, output.
void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
void run() override
Run the kernels contained in the function.
Definition: NESlice.cpp:93
~NEFFTConvolutionLayer()
Default destructor.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.