Compute Library
 23.05
NEFFTConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
27 #include "arm_compute/core/Utils.h"
30 #include "src/common/utils/Log.h"
38 
39 namespace arm_compute
40 {
41 namespace
42 {
43 int pad_decomposable(int N)
44 {
45  const auto supported_radix = NEFFTRadixStageKernel::supported_radix();
46 
47  int pad = 0;
48  bool is_decomposed = false;
49  while(!is_decomposed)
50  {
51  const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N++, supported_radix);
52  is_decomposed = !decomposed_vector.empty();
53  if(!is_decomposed)
54  {
55  ++pad;
56  }
57  }
58  return pad;
59 }
60 } // namespace
61 
62 NEFFTConvolutionLayer::NEFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
63  : _memory_group(memory_manager),
64  _flip_weights_func(),
65  _permute_input_func(),
66  _permute_output_func(),
67  _permute_weights_func(),
68  _permute_bias_func(),
69  _pad_input_func(),
70  _pad_weights_func(),
71  _transform_input_func(memory_manager),
72  _transform_weights_func(),
73  _itransform_output_func(memory_manager),
74  _prod_func(),
75  _reduce_func(),
76  _extract_output_func(),
77  _bias_add_func(),
78  _activation_layer_func(),
79  _permuted_input(),
80  _permuted_weights(),
81  _permuted_bias(),
82  _permuted_output(),
83  _padded_input(),
84  _padded_weights(),
85  _flip_axis(),
86  _flipped_weights(),
87  _transformed_input(),
88  _transformed_weights(),
89  _input_weights_product(),
90  _output_product(),
91  _output_reduced(),
92  _itransformed_output(),
93  _reshaped_output(),
94  _bias_output(),
95  _original_weights(nullptr),
96  _original_bias(nullptr),
97  _is_activationlayer_enabled(false),
98  _needs_permute(false),
99  _has_bias(false),
100  _is_prepared(false)
101 {
102 }
104 
105 void NEFFTConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
106  const ActivationLayerInfo &act_info, bool enable_fast_math)
107 {
108  ARM_COMPUTE_UNUSED(enable_fast_math);
109  ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, act_info, enable_fast_math);
110 
111  _original_weights = weights;
112  _original_bias = biases;
113 
114  // Flat if bias addition is required
115  _has_bias = biases != nullptr;
116 
117  // Get indices for the width and height
120 
121  // Input shape, kernel size and output tile
122  const Size2D input_dims = Size2D(input->info()->tensor_shape()[idx_width], input->info()->tensor_shape()[idx_height]);
123  const Size2D kernel_size = Size2D(weights->info()->tensor_shape()[idx_width], weights->info()->tensor_shape()[idx_height]);
124  const Size2D pad_valid = Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
125  pad_decomposable(input_dims.y() + kernel_size.y() - 1));
126  // Tensors to use
127  ITensor *input_to_use = input;
128  const ITensor *weights_to_use = weights;
129  ITensor *output_to_use = _has_bias ? &_bias_output : output;
130 
131  // Permute bias
132  if(biases != nullptr)
133  {
134  _permute_bias_func.configure(biases, &_permuted_bias, PermutationVector(1U, 2U, 0U));
135  _permuted_bias.info()->set_data_layout(DataLayout::NCHW);
136  }
137 
138  // Permute input if needed
139  _needs_permute = input->info()->data_layout() == DataLayout::NHWC;
140  if(_needs_permute)
141  {
142  _memory_group.manage(&_permuted_input);
143  // Configure the function to transform the input tensor from NHWC -> NCHW
144  _permute_input_func.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
145  _permuted_input.info()->set_data_layout(DataLayout::NCHW);
146 
147  // Configure the function to transform the weights tensor from HWI -> IHW
148  _permute_weights_func.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
149  _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
150 
151  input_to_use = &_permuted_input;
152  weights_to_use = &_permuted_weights;
153  }
154 
155  // Flip weights
156  _flipped_weights.allocator()->init(weights_to_use->info()->clone()->set_is_resizable(true).reset_padding());
157  _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
158  _flip_weights_func.configure(weights_to_use, &_flipped_weights, &_flip_axis);
159 
160  // Pad weights
161  const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
162  _pad_weights_func.configure(&_flipped_weights, &_padded_weights, padding_w);
163 
164  // Transform weights
165  _transform_weights_func = std::make_unique<NEFFT2D>();
166  _transform_weights_func->configure(&_padded_weights, &_transformed_weights, FFT2DInfo());
167 
168  // Pad input
169  const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
170  _memory_group.manage(&_padded_input);
171  _pad_input_func.configure(input_to_use, &_padded_input, padding_in);
172  if(_needs_permute)
173  {
174  _permuted_input.allocator()->allocate();
175  }
176 
177  // Transform input
178  _memory_group.manage(&_transformed_input);
179  _transform_input_func.configure(&_padded_input, &_transformed_input, FFT2DInfo());
180  _padded_input.allocator()->allocate();
181 
182  // Perform product
183  _memory_group.manage(&_output_product);
184  _prod_func.configure(&_transformed_input, &_transformed_weights, &_output_product);
185  _transformed_input.allocator()->allocate();
186 
187  // Perform reduction
188  _memory_group.manage(&_output_reduced);
189  _reduce_func.configure(&_output_product, &_output_reduced, 2, ReductionOperation::SUM);
190  _output_product.allocator()->allocate();
191 
192  // Transform output
193  _memory_group.manage(&_itransformed_output);
194  FFT2DInfo itranform_info;
195  itranform_info.direction = FFTDirection::Inverse;
196  _itransformed_output.allocator()->init(_output_reduced.info()->clone()->set_is_resizable(true).set_num_channels(1).reset_padding());
197  _itransform_output_func.configure(&_output_reduced, &_itransformed_output, itranform_info);
198  _output_reduced.allocator()->allocate();
199 
200  // Reshape output
201  TensorShape reshaped_shape = _itransformed_output.info()->tensor_shape();
202  reshaped_shape.remove_dimension(2);
203  _reshaped_output.allocator()->init(_itransformed_output.info()->clone()->set_tensor_shape(reshaped_shape));
204 
205  // Extract correct region
206  const int start_left = kernel_size.x() - conv_info.pad_left() - 1;
207  const int start_top = kernel_size.y() - conv_info.pad_top() - 1;
208  const int end_right = _reshaped_output.info()->tensor_shape().x() - (kernel_size.x() - conv_info.pad_right() - 1) - pad_valid.x();
209  const int end_botton = _reshaped_output.info()->tensor_shape().y() - (kernel_size.y() - conv_info.pad_bottom() - 1) - pad_valid.y();
210  if(_has_bias)
211  {
212  _memory_group.manage(&_bias_output);
213  }
214  else if(_needs_permute)
215  {
216  output_to_use = &_permuted_output;
217  _memory_group.manage(&_permuted_output);
218  }
219  _extract_output_func.configure(&_reshaped_output, output_to_use, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
220  _reshaped_output.allocator()->allocate();
221  _itransformed_output.allocator()->allocate();
222 
223  // Add bias
224  if(biases != nullptr)
225  {
226  output_to_use = output;
227  if(_needs_permute)
228  {
229  output_to_use = &_permuted_output;
230  _memory_group.manage(&_permuted_output);
231  }
232  auto_init_if_empty(*output_to_use->info(), *_bias_output.info());
233  _bias_add_func.configure(&_bias_output, &_permuted_bias, output_to_use, ConvertPolicy::WRAP);
234  _bias_output.allocator()->allocate();
235  }
236 
237  // Permute output
238  if(_needs_permute)
239  {
240  // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
241  _permuted_output.info()->set_data_layout(DataLayout::NCHW);
242  _permute_output_func.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
243 
244  // Allocate tensors
245  _permuted_output.allocator()->allocate();
246  }
247 
248  // Configure Activation Layer
249  _is_activationlayer_enabled = act_info.enabled();
250  if(_is_activationlayer_enabled)
251  {
252  _activation_layer_func.configure(output, nullptr, act_info);
253  }
254 
255  // Setup flip axis data
256  _flip_axis.allocator()->allocate();
257 
258  auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
259  axis_data[0] = 0;
260  axis_data[1] = 1;
261 }
262 
264  const ActivationLayerInfo &act_info, bool enable_fast_math)
265 {
266  ARM_COMPUTE_UNUSED(enable_fast_math);
267 
270 
271  // Get indices for the width and height
274 
275  // Input shape, kernel size and output tile
276  const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
277 
278  // Strides
279  const auto strides = conv_info.stride();
280  ARM_COMPUTE_RETURN_ERROR_ON(strides.first != strides.second && strides.first != 1);
281  ARM_COMPUTE_RETURN_ERROR_ON(kernel_size.x() != kernel_size.y());
282  ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_left() != (kernel_size.x() / 2) || conv_info.pad_right() != (kernel_size.x() / 2));
283  ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_top() != (kernel_size.y() / 2) || conv_info.pad_bottom() != (kernel_size.y() / 2));
284 
285  // Validate biases
286  if(biases != nullptr)
287  {
288  const size_t idx_channels = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
290  ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channels] != biases->tensor_shape().x());
291  }
292 
293  // Checks performed when output is configured
294  if((output != nullptr) && (output->total_size() != 0))
295  {
298 
299  // Validate Activation Layer
300  if(act_info.enabled())
301  {
302  ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
303  }
304  }
305 
306  return Status{};
307 }
308 
310 {
311  prepare();
312 
313  MemoryGroupResourceScope scope_mg(_memory_group);
314 
315  // Transform input
316  if(_needs_permute)
317  {
318  _permute_input_func.run();
319  }
320  _pad_input_func.run();
321  _transform_input_func.run();
322 
323  // Perform operations to frequency domain
324  _prod_func.run();
325 
326  _reduce_func.run();
327 
328  // Transform output
329  _itransform_output_func.run();
330  _reshaped_output.allocator()->import_memory(_itransformed_output.buffer());
331  _extract_output_func.run();
332 
333  // Add bias
334  if(_has_bias)
335  {
336  _bias_add_func.run();
337  }
338  if(_needs_permute)
339  {
340  _permute_output_func.run();
341  }
342 
343  // Run activation layer
344  if(_is_activationlayer_enabled)
345  {
346  _activation_layer_func.run();
347  }
348 }
349 
351 {
352  if(!_is_prepared)
353  {
354  // Permute bias to NCHW
355  if(_original_bias != nullptr)
356  {
357  _permuted_bias.allocator()->allocate();
358  _permute_bias_func.run();
359  _original_bias->mark_as_unused();
360  }
361 
362  const ITensor *cur_weights = _original_weights;
363 
364  // Permute weights
365  if(_needs_permute)
366  {
367  ARM_COMPUTE_ERROR_ON(!cur_weights->is_used());
368 
369  _permuted_weights.allocator()->allocate();
370  _permute_weights_func.run();
371  cur_weights->mark_as_unused();
372  cur_weights = &_permuted_weights;
373  }
374 
375  // Flip weights
376  _flipped_weights.allocator()->allocate();
377  _flip_weights_func.run();
378  cur_weights->mark_as_unused();
379 
380  // Pad weights
381  _padded_weights.allocator()->allocate();
382  _pad_weights_func.run();
383  _flipped_weights.mark_as_unused();
384  _flipped_weights.allocator()->free();
385 
386  // Transform weights to frequency domain
387  _transformed_weights.allocator()->allocate();
388  _transform_weights_func->run();
389  _transform_weights_func.reset();
390 
391  _padded_weights.mark_as_unused();
392  _padded_weights.allocator()->free();
393 
394  _is_prepared = true;
395  }
396 }
397 } // namespace arm_compute
void prepare() override
Prepare the function for executing.
Shape of a tensor.
Definition: TensorShape.h:39
FFTDirection direction
Direction of the FFT.
void remove_dimension(size_t n)
Accessor to remove the dimension n from the tensor shape.
Definition: TensorShape.h:111
void run() override
Run the kernels contained in the function.
void run() override final
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
bool enabled() const
Check if initialised.
Definition: Types.h:1714
std::vector< PaddingInfo > PaddingList
List of padding information.
Definition: Types.h:455
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
Definition: fft.cpp:34
Descriptor used by the FFT2D function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
bool is_used() const
Flags if the tensor is used or not.
Definition: ITensor.cpp:163
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
1 channel, 1 F32 per channel
Strides PermutationVector
Permutation vector.
Definition: Types.h:51
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:43
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of NEFFTConvolutionLayer.
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:755
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1659
Interface for CPU tensor.
Definition: ITensor.h:36
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel&#39;s inputs, output and conversion policy.
Copyright (c) 2017-2023 Arm Limited.
void configure(const ITensor *input, ITensor *output, const ITensor *axis)
Initialize the function.
Definition: NEReverse.cpp:32
TensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: Tensor.cpp:48
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Definition: Tensor.cpp:33
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:168
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:87
void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
Definition: NESlice.cpp:87
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
NEFFTConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
1 channel, 1 U32 per channel
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual ITensorInfo & set_data_layout(const DataLayout &data_layout)=0
Set the data layout of the tensor.
Coordinates of an item.
Definition: Coordinates.h:37
unsigned int N
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:719
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
unsigned int pad_right() const
Get the right padding.
Definition: Types.h:750
Padding and stride information class.
Definition: Types.h:671
void free() override
Free allocated CPU memory.
Num samples, channels, height, width.
void run() override
Run the kernels contained in the function.
Definition: NEPermute.cpp:63
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
void run() override
Run the kernels contained in the function.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:203
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
Definition: NEPadLayer.cpp:257
void run() override
Run the kernels contained in the function.
Definition: NEFFT2D.cpp:90
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
Definition: Tensor.cpp:43
void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value=PixelValue(), const PaddingMode mode=PaddingMode::CONSTANT)
Initialize the function.
Definition: NEPadLayer.cpp:173
Status import_memory(void *memory)
Import an existing memory as a tensor&#39;s backing memory.
#define ARM_COMPUTE_LOG_PARAMS(...)
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
T y() const
Alias to access the size of the second dimension.
Definition: Dimensions.h:92
unsigned int pad_bottom() const
Get the bottom padding.
Definition: Types.h:760
void configure(const ITensor *input, ITensor *output, const PermutationVector &perm)
Configure the permute function.
Definition: NEPermute.cpp:45
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:745
void configure(const ITensor *input, ITensor *output, const FFT2DInfo &config)
Initialise the function&#39;s source and destinations.
Definition: NEFFT2D.cpp:40
void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel&#39;s inputs, output.
void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
void run() override
Run the kernels contained in the function.
Definition: NESlice.cpp:95
~NEFFTConvolutionLayer()
Default destructor.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.