43 int pad_decomposable(
int N)
48 bool is_decomposed =
false;
52 is_decomposed = !decomposed_vector.empty();
63 : _memory_group(memory_manager),
65 _permute_input_func(),
66 _permute_output_func(),
67 _permute_weights_func(),
71 _transform_input_func(memory_manager),
72 _transform_weights_func(),
73 _itransform_output_func(memory_manager),
76 _extract_output_func(),
78 _activation_layer_func(),
88 _transformed_weights(),
89 _input_weights_product(),
92 _itransformed_output(),
95 _original_weights(nullptr),
96 _original_bias(nullptr),
97 _is_activationlayer_enabled(false),
98 _needs_permute(false),
111 _original_weights = weights;
112 _original_bias = biases;
115 _has_bias = biases !=
nullptr;
124 const Size2D pad_valid =
Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
125 pad_decomposable(input_dims.y() + kernel_size.y() - 1));
128 const ITensor *weights_to_use = weights;
129 ITensor *output_to_use = _has_bias ? &_bias_output : output;
132 if(biases !=
nullptr)
142 _memory_group.
manage(&_permuted_input);
151 input_to_use = &_permuted_input;
152 weights_to_use = &_permuted_weights;
156 _flipped_weights.
allocator()->
init(weights_to_use->
info()->
clone()->set_is_resizable(
true).reset_padding());
158 _flip_weights_func.
configure(weights_to_use, &_flipped_weights, &_flip_axis);
161 const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
162 _pad_weights_func.
configure(&_flipped_weights, &_padded_weights, padding_w);
165 _transform_weights_func = std::make_unique<NEFFT2D>();
166 _transform_weights_func->configure(&_padded_weights, &_transformed_weights,
FFT2DInfo());
169 const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
170 _memory_group.
manage(&_padded_input);
171 _pad_input_func.
configure(input_to_use, &_padded_input, padding_in);
178 _memory_group.
manage(&_transformed_input);
183 _memory_group.
manage(&_output_product);
184 _prod_func.
configure(&_transformed_input, &_transformed_weights, &_output_product);
188 _memory_group.
manage(&_output_reduced);
193 _memory_group.
manage(&_itransformed_output);
196 _itransformed_output.
allocator()->
init(_output_reduced.
info()->
clone()->set_is_resizable(
true).set_num_channels(1).reset_padding());
197 _itransform_output_func.
configure(&_output_reduced, &_itransformed_output, itranform_info);
203 _reshaped_output.
allocator()->
init(_itransformed_output.
info()->
clone()->set_tensor_shape(reshaped_shape));
206 const int start_left = kernel_size.x() - conv_info.
pad_left() - 1;
207 const int start_top = kernel_size.y() - conv_info.
pad_top() - 1;
208 const int end_right = _reshaped_output.
info()->
tensor_shape().
x() - (kernel_size.x() - conv_info.
pad_right() - 1) - pad_valid.x();
209 const int end_botton = _reshaped_output.
info()->
tensor_shape().
y() - (kernel_size.y() - conv_info.
pad_bottom() - 1) - pad_valid.y();
212 _memory_group.
manage(&_bias_output);
214 else if(_needs_permute)
216 output_to_use = &_permuted_output;
217 _memory_group.
manage(&_permuted_output);
224 if(biases !=
nullptr)
226 output_to_use = output;
229 output_to_use = &_permuted_output;
230 _memory_group.
manage(&_permuted_output);
249 _is_activationlayer_enabled = act_info.
enabled();
250 if(_is_activationlayer_enabled)
252 _activation_layer_func.
configure(output,
nullptr, act_info);
258 auto axis_data =
reinterpret_cast<uint32_t *
>(_flip_axis.
buffer());
279 const auto strides = conv_info.
stride();
286 if(biases !=
nullptr)
294 if((output !=
nullptr) && (output->
total_size() != 0))
318 _permute_input_func.
run();
320 _pad_input_func.
run();
321 _transform_input_func.
run();
329 _itransform_output_func.
run();
331 _extract_output_func.
run();
336 _bias_add_func.
run();
340 _permute_output_func.
run();
344 if(_is_activationlayer_enabled)
346 _activation_layer_func.
run();
355 if(_original_bias !=
nullptr)
358 _permute_bias_func.
run();
362 const ITensor *cur_weights = _original_weights;
370 _permute_weights_func.
run();
372 cur_weights = &_permuted_weights;
377 _flip_weights_func.
run();
382 _pad_weights_func.
run();
388 _transform_weights_func->run();
389 _transform_weights_func.reset();
void prepare() override
Prepare the function for executing.
FFTDirection direction
Direction of the FFT.
void remove_dimension(size_t n)
Accessor to remove the dimension n from the tensor shape.
void run() override
Run the kernels contained in the function.
void run() override final
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
bool enabled() const
Check if initialised.
std::vector< PaddingInfo > PaddingList
List of padding information.
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
Descriptor used by the FFT2D function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
bool is_used() const
Flags if the tensor is used or not.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
1 channel, 1 F32 per channel
Strides PermutationVector
Permutation vector.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Store the tensor's metadata.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of NEFFTConvolutionLayer.
unsigned int pad_top() const
Get the top padding.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for CPU tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Copyright (c) 2017-2023 Arm Limited.
void configure(const ITensor *input, ITensor *output, const ITensor *axis)
Initialize the function.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void mark_as_unused() const
Marks a tensor as unused.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
T x() const
Alias to access the size of the first dimension.
void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
NEFFTConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
1 channel, 1 U32 per channel
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual ITensorInfo & set_data_layout(const DataLayout &data_layout)=0
Set the data layout of the tensor.
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
unsigned int pad_right() const
Get the right padding.
Padding and stride information class.
void free() override
Free allocated CPU memory.
Num samples, channels, height, width.
void run() override
Run the kernels contained in the function.
Memory group resources scope handling class.
void run() override
Run the kernels contained in the function.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value=PixelValue(), const PaddingMode mode=PaddingMode::CONSTANT)
Initialize the function.
Status import_memory(void *memory)
Import an existing memory as a tensor's backing memory.
#define ARM_COMPUTE_LOG_PARAMS(...)
Store the tensor's metadata.
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
T y() const
Alias to access the size of the second dimension.
unsigned int pad_bottom() const
Get the bottom padding.
void configure(const ITensor *input, ITensor *output, const PermutationVector &perm)
Configure the permute function.
unsigned int pad_left() const
Get the left padding.
void configure(const ITensor *input, ITensor *output, const FFT2DInfo &config)
Initialise the function's source and destinations.
void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output.
void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
void run() override
Run the kernels contained in the function.
~NEFFTConvolutionLayer()
Default destructor.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.