45 int pad_decomposable(
int N)
50 bool is_decomposed =
false;
54 is_decomposed = !decomposed_vector.empty();
64 : _memory_group(memory_manager),
66 _permute_input_func(),
67 _permute_output_func(),
68 _permute_weights_func(),
72 _transform_input_func(memory_manager),
73 _transform_weights_func(),
74 _itransform_output_func(memory_manager),
77 _extract_output_func(),
79 _activation_layer_func(),
89 _transformed_weights(),
90 _input_weights_product(),
93 _itransformed_output(),
96 _original_weights(nullptr),
97 _original_bias(nullptr),
98 _is_activationlayer_enabled(false),
99 _needs_permute(false),
117 _original_weights = weights;
118 _original_bias = biases;
121 _has_bias = biases !=
nullptr;
130 const Size2D pad_valid =
Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
131 pad_decomposable(input_dims.y() + kernel_size.y() - 1));
134 const ICLTensor *weights_to_use = weights;
135 ICLTensor *output_to_use = _has_bias ? &_bias_output : output;
138 if(biases !=
nullptr)
148 _memory_group.
manage(&_permuted_input);
157 input_to_use = &_permuted_input;
158 weights_to_use = &_permuted_weights;
162 _flipped_weights.
allocator()->
init(weights_to_use->
info()->
clone()->set_is_resizable(
true).reset_padding());
164 _flip_weights_func.
configure(compile_context, weights_to_use, &_flipped_weights, &_flip_axis);
167 const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
168 _pad_weights_func.
configure(compile_context, &_flipped_weights, &_padded_weights, padding_w);
171 _transform_weights_func = std::make_unique<CLFFT2D>();
172 _transform_weights_func->configure(compile_context, &_padded_weights, &_transformed_weights,
FFT2DInfo());
175 const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
176 _memory_group.
manage(&_padded_input);
177 _pad_input_func.
configure(compile_context, input_to_use, &_padded_input, padding_in);
184 _memory_group.
manage(&_transformed_input);
185 _transform_input_func.
configure(compile_context, &_padded_input, &_transformed_input,
FFT2DInfo());
189 _memory_group.
manage(&_output_product);
190 _prod_func.
configure(compile_context, &_transformed_input, &_transformed_weights, &_output_product);
194 _memory_group.
manage(&_output_reduced);
199 _memory_group.
manage(&_itransformed_output);
202 _itransformed_output.
allocator()->
init(_output_reduced.
info()->
clone()->set_is_resizable(
true).set_num_channels(1).reset_padding());
203 _itransform_output_func.
configure(compile_context, &_output_reduced, &_itransformed_output, itranform_info);
209 _reshaped_output.
allocator()->
init(_itransformed_output.
info()->
clone()->set_tensor_shape(reshaped_shape));
212 const int start_left = kernel_size.x() - conv_info.
pad_left() - 1;
213 const int start_top = kernel_size.y() - conv_info.
pad_top() - 1;
214 const int end_right = _reshaped_output.
info()->
tensor_shape().
x() - (kernel_size.x() - conv_info.
pad_right() - 1) - pad_valid.x();
215 const int end_botton = _reshaped_output.
info()->
tensor_shape().
y() - (kernel_size.y() - conv_info.
pad_bottom() - 1) - pad_valid.y();
218 _memory_group.
manage(&_bias_output);
220 else if(_needs_permute)
222 output_to_use = &_permuted_output;
223 _memory_group.
manage(&_permuted_output);
225 _extract_output_func.
configure(compile_context, &_reshaped_output, output_to_use,
Coordinates(start_left, start_top),
Coordinates(end_right, end_botton));
229 if(biases !=
nullptr)
231 output_to_use = output;
234 output_to_use = &_permuted_output;
235 _memory_group.
manage(&_permuted_output);
254 _is_activationlayer_enabled = act_info.enabled();
255 if(_is_activationlayer_enabled)
257 _activation_layer_func.
configure(compile_context, output,
nullptr, act_info);
262 _flip_axis.
map(
true);
263 auto axis_data =
reinterpret_cast<uint32_t *
>(_flip_axis.
buffer());
284 const auto strides = conv_info.
stride();
291 if(biases !=
nullptr)
298 if((output !=
nullptr) && (output->
total_size() != 0))
322 _permute_input_func.
run();
324 _pad_input_func.
run();
325 _transform_input_func.
run();
332 _itransform_output_func.
run();
334 _extract_output_func.
run();
338 _bias_add_func.
run();
342 _permute_output_func.
run();
346 if(_is_activationlayer_enabled)
348 _activation_layer_func.
run();
357 if(_original_bias !=
nullptr)
360 _permute_bias_func.
run();
364 const ICLTensor *cur_weights = _original_weights;
371 _permute_weights_func.
run();
373 cur_weights = &_permuted_weights;
378 _flip_weights_func.
run();
383 _pad_weights_func.
run();
390 _transform_weights_func->run();
394 _transform_weights_func.reset();
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
FFTDirection direction
Direction of the FFT.
void remove_dimension(size_t n)
Accessor to remove the dimension n from the tensor shape.
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config)
Initialise the function's source, destinations and border mode.
bool enabled() const
Check if initialised.
std::vector< PaddingInfo > PaddingList
List of padding information.
static CLScheduler & get()
Access the scheduler singleton.
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
void run() override
Run the kernels contained in the function.
const cl::Buffer & cl_buffer() const override
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
Descriptor used by the FFT2D function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
1 channel, 1 F32 per channel
Strides PermutationVector
Permutation vector.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
void run() override
Run the kernels contained in the function.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CLFFTConvolutionLayer.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
unsigned int pad_top() const
Get the top padding.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
Status import_memory(cl::Buffer buffer)
Import an existing memory as a tensor's backing memory.
void run() override
Run the kernels contained in the function.
1 channel, 1 F16 per channel
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
void mark_as_unused() const
Marks a tensor as unused.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
T x() const
Alias to access the size of the first dimension.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
ITensorInfo & set_data_layout(const DataLayout &data_layout) override
Set the data layout of the tensor.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
void run() override final
Run the kernels contained in the function.
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void run() override
Run the kernels contained in the function.
void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value=PixelValue(), PaddingMode mode=PaddingMode::CONSTANT)
Initialize the function.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
unsigned int pad_right() const
Get the right padding.
Padding and stride information class.
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Num samples, channels, height, width.
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
Basic pool of threads to execute CPP/Neon code on several cores in parallel.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Memory group resources scope handling class.
Interface for OpenCL tensor.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
CLFFTConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
Store the tensor's metadata.
void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
T y() const
Alias to access the size of the second dimension.
void run() override
Run the kernels contained in the function.
void prepare() override
Prepare the function for executing.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
unsigned int pad_bottom() const
Get the bottom padding.
void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
unsigned int pad_left() const
Get the left padding.
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output.
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis)
Initialize the function.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.