Compute Library
 20.08
NEFFTConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
27 #include "arm_compute/core/Utils.h"
31 
32 namespace arm_compute
33 {
34 namespace
35 {
36 int pad_decomposable(int N)
37 {
38  const auto supported_radix = NEFFTRadixStageKernel::supported_radix();
39 
40  int pad = 0;
41  bool is_decomposed = false;
42  while(!is_decomposed)
43  {
44  const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N++, supported_radix);
45  is_decomposed = !decomposed_vector.empty();
46  if(!is_decomposed)
47  {
48  ++pad;
49  }
50  }
51  return pad;
52 }
53 } // namespace
54 
55 NEFFTConvolutionLayer::NEFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
56  : _memory_group(memory_manager),
57  _flip_weights_func(),
58  _permute_input_func(),
59  _permute_output_func(),
60  _permute_weights_func(),
61  _permute_bias_func(),
62  _pad_input_func(),
63  _pad_weights_func(),
64  _transform_input_func(memory_manager),
65  _transform_weights_func(),
66  _itransform_output_func(memory_manager),
67  _prod_func(),
68  _reduce_func(),
69  _extract_output_func(),
70  _bias_add_func(),
71  _activation_layer_func(),
72  _permuted_input(),
73  _permuted_weights(),
74  _permuted_bias(),
75  _permuted_output(),
76  _padded_input(),
77  _padded_weights(),
78  _flip_axis(),
79  _flipped_weights(),
80  _transformed_input(),
81  _transformed_weights(),
82  _input_weights_product(),
83  _output_product(),
84  _output_reduced(),
85  _itransformed_output(),
86  _reshaped_output(),
87  _bias_output(),
88  _original_weights(nullptr),
89  _original_bias(nullptr),
90  _is_activationlayer_enabled(false),
91  _needs_permute(false),
92  _has_bias(false),
93  _is_prepared(false)
94 {
95 }
96 
98  const ActivationLayerInfo &act_info)
99 {
100  _original_weights = weights;
101  _original_bias = biases;
102 
103  // Flat if bias addition is required
104  _has_bias = biases != nullptr;
105 
106  // Get indices for the width and height
107  const size_t idx_width = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);
108  const size_t idx_height = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
109 
110  // Input shape, kernel size and output tile
111  const Size2D input_dims = Size2D(input->info()->tensor_shape()[idx_width], input->info()->tensor_shape()[idx_height]);
112  const Size2D kernel_size = Size2D(weights->info()->tensor_shape()[idx_width], weights->info()->tensor_shape()[idx_height]);
113  const Size2D pad_valid = Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
114  pad_decomposable(input_dims.y() + kernel_size.y() - 1));
115  // Tensors to use
116  ITensor *input_to_use = input;
117  const ITensor *weights_to_use = weights;
118  ITensor *output_to_use = _has_bias ? &_bias_output : output;
119 
120  // Permute bias
121  if(biases != nullptr)
122  {
123  _permute_bias_func.configure(biases, &_permuted_bias, PermutationVector(1U, 2U, 0U));
124  _permuted_bias.info()->set_data_layout(DataLayout::NCHW);
125  }
126 
127  // Permute input if needed
128  _needs_permute = input->info()->data_layout() == DataLayout::NHWC;
129  if(_needs_permute)
130  {
131  _memory_group.manage(&_permuted_input);
132  // Configure the function to transform the input tensor from NHWC -> NCHW
133  _permute_input_func.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
134  _permuted_input.info()->set_data_layout(DataLayout::NCHW);
135 
136  // Configure the function to transform the weights tensor from HWI -> IHW
137  _permute_weights_func.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
138  _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
139 
140  input_to_use = &_permuted_input;
141  weights_to_use = &_permuted_weights;
142  }
143 
144  // Flip weights
145  _flipped_weights.allocator()->init(weights_to_use->info()->clone()->set_is_resizable(true).reset_padding());
146  _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
147  _flip_weights_func.configure(weights_to_use, &_flipped_weights, &_flip_axis);
148 
149  // Pad weights
150  const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
151  _pad_weights_func.configure(&_flipped_weights, &_padded_weights, padding_w);
152 
153  // Transform weights
154  _transform_weights_func = support::cpp14::make_unique<NEFFT2D>();
155  _transform_weights_func->configure(&_padded_weights, &_transformed_weights, FFT2DInfo());
156 
157  // Pad input
158  const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
159  _memory_group.manage(&_padded_input);
160  _pad_input_func.configure(input_to_use, &_padded_input, padding_in);
161  if(_needs_permute)
162  {
163  _permuted_input.allocator()->allocate();
164  }
165 
166  // Transform input
167  _memory_group.manage(&_transformed_input);
168  _transform_input_func.configure(&_padded_input, &_transformed_input, FFT2DInfo());
169  _padded_input.allocator()->allocate();
170 
171  // Perform product
172  _memory_group.manage(&_output_product);
173  _prod_func.configure(&_transformed_input, &_transformed_weights, &_output_product);
174  _transformed_input.allocator()->allocate();
175 
176  // Perform reduction
177  _memory_group.manage(&_output_reduced);
178  _reduce_func.configure(&_output_product, &_output_reduced, 2, ReductionOperation::SUM);
179  _output_product.allocator()->allocate();
180 
181  // Transform output
182  _memory_group.manage(&_itransformed_output);
183  FFT2DInfo itranform_info;
184  itranform_info.direction = FFTDirection::Inverse;
185  _itransformed_output.allocator()->init(_output_reduced.info()->clone()->set_is_resizable(true).set_num_channels(1).reset_padding());
186  _itransform_output_func.configure(&_output_reduced, &_itransformed_output, itranform_info);
187  _output_reduced.allocator()->allocate();
188 
189  // Reshape output
190  TensorShape reshaped_shape = _itransformed_output.info()->tensor_shape();
191  reshaped_shape.remove_dimension(2);
192  _reshaped_output.allocator()->init(_itransformed_output.info()->clone()->set_tensor_shape(reshaped_shape));
193 
194  // Extract correct region
195  const int start_left = kernel_size.x() - conv_info.pad_left() - 1;
196  const int start_top = kernel_size.y() - conv_info.pad_top() - 1;
197  const int end_right = _reshaped_output.info()->tensor_shape().x() - (kernel_size.x() - conv_info.pad_right() - 1) - pad_valid.x();
198  const int end_botton = _reshaped_output.info()->tensor_shape().y() - (kernel_size.y() - conv_info.pad_bottom() - 1) - pad_valid.y();
199  if(_has_bias)
200  {
201  _memory_group.manage(&_bias_output);
202  }
203  else if(_needs_permute)
204  {
205  output_to_use = &_permuted_output;
206  _memory_group.manage(&_permuted_output);
207  }
208  _extract_output_func.configure(&_reshaped_output, output_to_use, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
209  _reshaped_output.allocator()->allocate();
210  _itransformed_output.allocator()->allocate();
211 
212  // Add bias
213  if(biases != nullptr)
214  {
215  output_to_use = output;
216  if(_needs_permute)
217  {
218  output_to_use = &_permuted_output;
219  _memory_group.manage(&_permuted_output);
220  }
221  auto_init_if_empty(*output_to_use->info(), *_bias_output.info());
222  _bias_add_func.configure(&_bias_output, &_permuted_bias, output_to_use, ConvertPolicy::WRAP);
223  _bias_output.allocator()->allocate();
224  }
225 
226  // Permute output
227  if(_needs_permute)
228  {
229  // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
230  _permuted_output.info()->set_data_layout(DataLayout::NCHW);
231  _permute_output_func.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
232 
233  // Allocate tensors
234  _permuted_output.allocator()->allocate();
235  }
236 
237  // Configure Activation Layer
238  _is_activationlayer_enabled = act_info.enabled();
239  if(_is_activationlayer_enabled)
240  {
241  _activation_layer_func.configure(output, nullptr, act_info);
242  }
243 
244  // Setup flip axis data
245  _flip_axis.allocator()->allocate();
246 
247  auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
248  axis_data[0] = 0;
249  axis_data[1] = 1;
250 }
251 
253  const ActivationLayerInfo &act_info)
254 {
257 
258  // Get indices for the width and height
259  const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
260  const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
261 
262  // Input shape, kernel size and output tile
263  const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
264 
265  // Strides
266  const auto strides = conv_info.stride();
267  ARM_COMPUTE_RETURN_ERROR_ON(strides.first != strides.second && strides.first != 1);
268  ARM_COMPUTE_RETURN_ERROR_ON(kernel_size.x() != kernel_size.y());
269  ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_left() != (kernel_size.x() / 2) || conv_info.pad_right() != (kernel_size.x() / 2));
270  ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_top() != (kernel_size.y() / 2) || conv_info.pad_bottom() != (kernel_size.y() / 2));
271 
272  // Validate biases
273  if(biases != nullptr)
274  {
275  const size_t idx_channels = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
277  ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channels] != biases->tensor_shape().x());
278  }
279 
280  // Checks performed when output is configured
281  if((output != nullptr) && (output->total_size() != 0))
282  {
284  ARM_COMPUTE_RETURN_ERROR_ON((input->tensor_shape()[idx_height] != output->tensor_shape()[idx_height]) || (input->tensor_shape()[idx_width] != output->tensor_shape()[idx_width]));
285 
286  // Validate Activation Layer
287  if(act_info.enabled())
288  {
289  ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
290  }
291  }
292 
293  return Status{};
294 }
295 
297 {
298  prepare();
299 
300  MemoryGroupResourceScope scope_mg(_memory_group);
301 
302  // Transform input
303  if(_needs_permute)
304  {
305  _permute_input_func.run();
306  }
307  _pad_input_func.run();
308  _transform_input_func.run();
309 
310  // Perform operations to frequency domain
311  _prod_func.run();
312 
313  _reduce_func.run();
314 
315  // Transform output
316  _itransform_output_func.run();
317  _reshaped_output.allocator()->import_memory(_itransformed_output.buffer());
318  _extract_output_func.run();
319 
320  // Add bias
321  if(_has_bias)
322  {
323  _bias_add_func.run();
324  }
325  if(_needs_permute)
326  {
327  _permute_output_func.run();
328  }
329 
330  // Run activation layer
331  if(_is_activationlayer_enabled)
332  {
333  _activation_layer_func.run();
334  }
335 }
336 
338 {
339  if(!_is_prepared)
340  {
341  // Permute bias to NCHW
342  if(_original_bias != nullptr)
343  {
344  _permuted_bias.allocator()->allocate();
345  _permute_bias_func.run();
346  _original_bias->mark_as_unused();
347  }
348 
349  const ITensor *cur_weights = _original_weights;
350 
351  // Permute weights
352  if(_needs_permute)
353  {
354  ARM_COMPUTE_ERROR_ON(!cur_weights->is_used());
355 
356  _permuted_weights.allocator()->allocate();
357  _permute_weights_func.run();
358  cur_weights->mark_as_unused();
359  cur_weights = &_permuted_weights;
360  }
361 
362  // Flip weights
363  _flipped_weights.allocator()->allocate();
364  _flip_weights_func.run();
365  cur_weights->mark_as_unused();
366 
367  // Pad weights
368  _padded_weights.allocator()->allocate();
369  _pad_weights_func.run();
370  _flipped_weights.mark_as_unused();
371  _flipped_weights.allocator()->free();
372 
373  // Transform weights to frequency domain
374  _transformed_weights.allocator()->allocate();
375  _transform_weights_func->run();
376  _transform_weights_func.reset();
377 
378  _padded_weights.mark_as_unused();
379  _padded_weights.allocator()->free();
380 
381  _is_prepared = true;
382  }
383 }
384 } // namespace arm_compute
void prepare() override
Prepare the function for executing.
Shape of a tensor.
Definition: TensorShape.h:39
FFTDirection direction
Direction of the FFT.
void remove_dimension(size_t n)
Accessor to remove the dimension n from the tensor shape.
Definition: TensorShape.h:110
void run() override
Run the kernels contained in the function.
void run() override final
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
bool enabled() const
Check if initialised.
Definition: Types.h:1567
std::vector< PaddingInfo > PaddingList
List of padding information.
Definition: Types.h:458
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
Definition: fft.cpp:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
Descriptor used by the FFT2D function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
bool is_used() const
Flags if the tensor is used or not.
Definition: ITensor.cpp:163
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
1 channel, 1 F32 per channel
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEFFTConvolutionLayer.
Strides PermutationVector
Permutation vector.
Definition: Types.h:49
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Store the tensor's metadata.
Definition: ITensorInfo.h:40
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1517
Interface for NEON tensor.
Definition: ITensor.h:36
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Copyright (c) 2017-2020 Arm Limited.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: Helpers.inl:207
void configure(const ITensor *input, ITensor *output, const ITensor *axis)
Initialize the function.
Definition: NEReverse.cpp:31
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Set the input and output tensors.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: Tensor.cpp:48
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: Tensor.cpp:33
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:168
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:81
void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
Definition: NESlice.cpp:87
NEFFTConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
1 channel, 1 U32 per channel
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual ITensorInfo & set_data_layout(const DataLayout &data_layout)=0
Set the data layout of the tensor.
Coordinates of an item.
Definition: Coordinates.h:37
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
Definition: Types.h:689
void free() override
Free allocated CPU memory.
Num samples, channels, height, width.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
void run() override
Run the kernels contained in the function.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Num samples, height, width, channels.
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
Definition: NEPadLayer.cpp:245
void run() override
Run the kernels contained in the function.
Definition: NEFFT2D.cpp:86
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory.
Definition: Tensor.cpp:43
void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value=PixelValue(), const PaddingMode mode=PaddingMode::CONSTANT)
Initialize the function.
Definition: NEPadLayer.cpp:162
Status import_memory(void *memory)
Import an existing memory as a tensor's backing memory.
Store the tensor's metadata.
Definition: TensorInfo.h:45
T y() const
Alias to access the size of the second dimension.
Definition: Dimensions.h:86
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:332
void configure(const ITensor *input, ITensor *output, const PermutationVector &perm)
Configure the permute NEON kernel.
Definition: NEPermute.cpp:31
void configure(const ITensor *input, ITensor *output, const FFT2DInfo &config)
Initialise the function's source and destinations.
Definition: NEFFT2D.cpp:37
void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output.
void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
void run() override
Run the kernels contained in the function.
Definition: NESlice.cpp:95