24.02.1
|
Go to the documentation of this file.
44 int pad_decomposable(
int N)
49 bool is_decomposed =
false;
50 while (!is_decomposed)
53 is_decomposed = !decomposed_vector.empty();
64 : _memory_group(memory_manager),
66 _permute_input_func(),
67 _permute_output_func(),
68 _permute_weights_func(),
72 _transform_input_func(memory_manager),
73 _transform_weights_func(),
74 _itransform_output_func(memory_manager),
77 _extract_output_func(),
79 _activation_layer_func(),
89 _transformed_weights(),
90 _input_weights_product(),
93 _itransformed_output(),
96 _original_weights(nullptr),
97 _original_bias(nullptr),
98 _is_activationlayer_enabled(false),
99 _needs_permute(false),
112 bool enable_fast_math)
117 _original_weights = weights;
118 _original_bias = biases;
121 _has_bias = biases !=
nullptr;
131 const Size2D kernel_size =
133 const Size2D pad_valid =
Size2D(pad_decomposable(input_dims.
x() + kernel_size.
x() - 1),
134 pad_decomposable(input_dims.
y() + kernel_size.
y() - 1));
137 const ITensor *weights_to_use = weights;
138 ITensor *output_to_use = _has_bias ? &_bias_output : output;
141 if (biases !=
nullptr)
151 _memory_group.
manage(&_permuted_input);
160 input_to_use = &_permuted_input;
161 weights_to_use = &_permuted_weights;
165 _flipped_weights.
allocator()->
init(weights_to_use->
info()->
clone()->set_is_resizable(
true).reset_padding());
167 _flip_weights_func.
configure(weights_to_use, &_flipped_weights, &_flip_axis);
170 const PaddingList padding_w = {{0, input_dims.
x() + pad_valid.
x() - 1}, {0, input_dims.
y() + pad_valid.
y() - 1}};
171 _pad_weights_func.
configure(&_flipped_weights, &_padded_weights, padding_w);
174 _transform_weights_func = std::make_unique<NEFFT2D>();
175 _transform_weights_func->configure(&_padded_weights, &_transformed_weights,
FFT2DInfo());
178 const PaddingList padding_in = {{0, kernel_size.
x() + pad_valid.
x() - 1}, {0, kernel_size.
y() + pad_valid.
y() - 1}};
179 _memory_group.
manage(&_padded_input);
180 _pad_input_func.
configure(input_to_use, &_padded_input, padding_in);
187 _memory_group.
manage(&_transformed_input);
192 _memory_group.
manage(&_output_product);
193 _prod_func.
configure(&_transformed_input, &_transformed_weights, &_output_product);
197 _memory_group.
manage(&_output_reduced);
202 _memory_group.
manage(&_itransformed_output);
206 _output_reduced.
info()->
clone()->set_is_resizable(
true).set_num_channels(1).reset_padding());
207 _itransform_output_func.
configure(&_output_reduced, &_itransformed_output, itranform_info);
213 _reshaped_output.
allocator()->
init(_itransformed_output.
info()->
clone()->set_tensor_shape(reshaped_shape));
216 const int start_left = kernel_size.
x() -
conv_info.pad_left() - 1;
217 const int start_top = kernel_size.
y() -
conv_info.pad_top() - 1;
218 const int end_right =
220 const int end_botton =
224 _memory_group.
manage(&_bias_output);
226 else if (_needs_permute)
228 output_to_use = &_permuted_output;
229 _memory_group.
manage(&_permuted_output);
231 _extract_output_func.
configure(&_reshaped_output, output_to_use,
Coordinates(start_left, start_top),
237 if (biases !=
nullptr)
239 output_to_use = output;
242 output_to_use = &_permuted_output;
243 _memory_group.
manage(&_permuted_output);
262 _is_activationlayer_enabled =
act_info.enabled();
263 if (_is_activationlayer_enabled)
271 auto axis_data =
reinterpret_cast<uint32_t *
>(_flip_axis.
buffer());
282 bool enable_fast_math)
301 conv_info.pad_right() != (kernel_size.
x() / 2));
303 conv_info.pad_bottom() != (kernel_size.
y() / 2));
306 if (biases !=
nullptr)
314 if ((output !=
nullptr) && (output->
total_size() != 0))
339 _permute_input_func.
run();
341 _pad_input_func.
run();
342 _transform_input_func.
run();
350 _itransform_output_func.
run();
352 _extract_output_func.
run();
357 _bias_add_func.
run();
361 _permute_output_func.
run();
365 if (_is_activationlayer_enabled)
367 _activation_layer_func.
run();
376 if (_original_bias !=
nullptr)
379 _permute_bias_func.
run();
383 const ITensor *cur_weights = _original_weights;
391 _permute_weights_func.
run();
393 cur_weights = &_permuted_weights;
398 _flip_weights_func.
run();
403 _pad_weights_func.
run();
409 _transform_weights_func->run();
410 _transform_weights_func.reset();
@ NCHW
Num samples, channels, height, width.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
size_t y() const
Semantic accessor for height as y.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::vector< PaddingInfo > PaddingList
List of padding information.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
void prepare() override
Prepare the function for executing.
@ NHWC
Num samples, height, width, channels.
void run() override
Run the kernels contained in the function.
Class for specifying the size of an image or rectangle.
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
Interface for CPU tensor.
void configure(const ITensor *input, ITensor *output, const ITensor *axis, const bool use_inverted_axis=false)
Initialize the function.
void configure(const ITensor *input, ITensor *output, const FFT2DInfo &config)
Initialise the function's source and destinations.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
Activation Layer Information class.
FFTDirection direction
Direction of the FFT.
Strides PermutationVector
Permutation vector.
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
@ U32
unsigned 32-bit number
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
void mark_as_unused() const
Marks a tensor as unused.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory.
void configure(const ITensor *input, ITensor *output, const PermutationVector &perm)
Configure the permute function.
void run() override
Run the kernels contained in the function.
T x() const
Alias to access the size of the first dimension.
void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value=PixelValue(), const PaddingMode mode=PaddingMode::CONSTANT)
Initialize the function.
size_t x() const
Semantic accessor for width as x.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
~NEFFTConvolutionLayer()
Default destructor.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Descriptor used by the FFT2D function.
void run() override
Run the kernels contained in the function.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Store the tensor's metadata.
bool is_used() const
Flags if the tensor is used or not.
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
Memory group resources scope handling class.
Copyright (c) 2017-2024 Arm Limited.
void remove_dimension(size_t n, bool apply_dim_correction=true)
Accessor to remove the dimension n from the tensor shape.
void run() override
Run the kernels contained in the function.
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
void run() override
Run the kernels contained in the function.
void free() override
Free allocated CPU memory.
Store the tensor's metadata.
virtual ITensorInfo & set_data_layout(const DataLayout &data_layout)=0
Set the data layout of the tensor.
@ F32
32-bit floating-point number
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of NEFFTConvolutionLayer.
void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
NEFFTConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
T y() const
Alias to access the size of the second dimension.
#define ARM_COMPUTE_LOG_PARAMS(...)
Status import_memory(void *memory)
Import an existing memory as a tensor's backing memory.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
void run() override final
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.