42 int pad_decomposable(
int N)
47 bool is_decomposed =
false;
51 is_decomposed = !decomposed_vector.empty();
62 : _memory_group(memory_manager),
64 _permute_input_func(),
65 _permute_output_func(),
66 _permute_weights_func(),
70 _transform_input_func(memory_manager),
71 _transform_weights_func(),
72 _itransform_output_func(memory_manager),
75 _extract_output_func(),
77 _activation_layer_func(),
87 _transformed_weights(),
88 _input_weights_product(),
91 _itransformed_output(),
94 _original_weights(nullptr),
95 _original_bias(nullptr),
96 _is_activationlayer_enabled(false),
97 _needs_permute(false),
109 _original_weights = weights;
110 _original_bias = biases;
113 _has_bias = biases !=
nullptr;
122 const Size2D pad_valid =
Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
123 pad_decomposable(input_dims.y() + kernel_size.y() - 1));
126 const ITensor *weights_to_use = weights;
127 ITensor *output_to_use = _has_bias ? &_bias_output : output;
130 if(biases !=
nullptr)
140 _memory_group.
manage(&_permuted_input);
149 input_to_use = &_permuted_input;
150 weights_to_use = &_permuted_weights;
154 _flipped_weights.
allocator()->
init(weights_to_use->
info()->
clone()->set_is_resizable(
true).reset_padding());
156 _flip_weights_func.
configure(weights_to_use, &_flipped_weights, &_flip_axis);
159 const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
160 _pad_weights_func.
configure(&_flipped_weights, &_padded_weights, padding_w);
163 _transform_weights_func = std::make_unique<NEFFT2D>();
164 _transform_weights_func->configure(&_padded_weights, &_transformed_weights,
FFT2DInfo());
167 const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
168 _memory_group.
manage(&_padded_input);
169 _pad_input_func.
configure(input_to_use, &_padded_input, padding_in);
176 _memory_group.
manage(&_transformed_input);
181 _memory_group.
manage(&_output_product);
182 _prod_func.
configure(&_transformed_input, &_transformed_weights, &_output_product);
186 _memory_group.
manage(&_output_reduced);
191 _memory_group.
manage(&_itransformed_output);
194 _itransformed_output.
allocator()->
init(_output_reduced.
info()->
clone()->set_is_resizable(
true).set_num_channels(1).reset_padding());
195 _itransform_output_func.
configure(&_output_reduced, &_itransformed_output, itranform_info);
201 _reshaped_output.
allocator()->
init(_itransformed_output.
info()->
clone()->set_tensor_shape(reshaped_shape));
204 const int start_left = kernel_size.x() -
conv_info.pad_left() - 1;
205 const int start_top = kernel_size.y() -
conv_info.pad_top() - 1;
206 const int end_right = _reshaped_output.
info()->
tensor_shape().
x() - (kernel_size.x() -
conv_info.pad_right() - 1) - pad_valid.x();
207 const int end_botton = _reshaped_output.
info()->
tensor_shape().
y() - (kernel_size.y() -
conv_info.pad_bottom() - 1) - pad_valid.y();
210 _memory_group.
manage(&_bias_output);
212 else if(_needs_permute)
214 output_to_use = &_permuted_output;
215 _memory_group.
manage(&_permuted_output);
222 if(biases !=
nullptr)
224 output_to_use = output;
227 output_to_use = &_permuted_output;
228 _memory_group.
manage(&_permuted_output);
247 _is_activationlayer_enabled = act_info.
enabled();
248 if(_is_activationlayer_enabled)
250 _activation_layer_func.
configure(output,
nullptr, act_info);
256 auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.
buffer());
284 if(biases !=
nullptr)
292 if((output !=
nullptr) && (output->
total_size() != 0))
316 _permute_input_func.
run();
318 _pad_input_func.
run();
319 _transform_input_func.
run();
327 _itransform_output_func.
run();
329 _extract_output_func.
run();
334 _bias_add_func.
run();
338 _permute_output_func.
run();
342 if(_is_activationlayer_enabled)
344 _activation_layer_func.
run();
353 if(_original_bias !=
nullptr)
356 _permute_bias_func.
run();
360 const ITensor *cur_weights = _original_weights;
368 _permute_weights_func.
run();
370 cur_weights = &_permuted_weights;
375 _flip_weights_func.
run();
380 _pad_weights_func.
run();
386 _transform_weights_func->run();
387 _transform_weights_func.reset();
void prepare() override
Prepare the function for executing.
FFTDirection direction
Direction of the FFT.
void remove_dimension(size_t n)
Accessor to remove the dimension n from the tensor shape.
void run() override
Run the kernels contained in the function.
void run() override final
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
bool enabled() const
Check if initialised.
std::vector< PaddingInfo > PaddingList
List of padding information.
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
Descriptor used by the FFT2D function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
bool is_used() const
Flags if the tensor is used or not.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
1 channel, 1 F32 per channel
Strides PermutationVector
Permutation vector.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Store the tensor's metadata.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of NEFFTConvolutionLayer.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for CPU tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Copyright (c) 2017-2021 Arm Limited.
void configure(const ITensor *input, ITensor *output, const ITensor *axis)
Initialize the function.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void mark_as_unused() const
Marks a tensor as unused.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
T x() const
Alias to access the size of the first dimension.
void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
NEFFTConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
1 channel, 1 U32 per channel
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual ITensorInfo & set_data_layout(const DataLayout &data_layout)=0
Set the data layout of the tensor.
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
void free() override
Free allocated CPU memory.
Num samples, channels, height, width.
void run() override
Run the kernels contained in the function.
Memory group resources scope handling class.
void run() override
Run the kernels contained in the function.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory.
void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value=PixelValue(), const PaddingMode mode=PaddingMode::CONSTANT)
Initialize the function.
Status import_memory(void *memory)
Import an existing memory as a tensor's backing memory.
Store the tensor's metadata.
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
T y() const
Alias to access the size of the second dimension.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
void configure(const ITensor *input, ITensor *output, const PermutationVector &perm)
Configure the permute function.
void configure(const ITensor *input, ITensor *output, const FFT2DInfo &config)
Initialise the function's source and destinations.
void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output.
void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
void run() override
Run the kernels contained in the function.
~NEFFTConvolutionLayer()
Default destructor.