47 int pad_decomposable(
int N)
52 bool is_decomposed =
false;
56 is_decomposed = !decomposed_vector.empty();
66 : _memory_group(memory_manager),
68 _permute_input_func(),
69 _permute_output_func(),
70 _permute_weights_func(),
74 _transform_input_func(memory_manager),
75 _transform_weights_func(),
76 _itransform_output_func(memory_manager),
79 _extract_output_func(),
81 _activation_layer_func(),
91 _transformed_weights(),
92 _input_weights_product(),
95 _itransformed_output(),
98 _original_weights(nullptr),
99 _original_bias(nullptr),
100 _is_activationlayer_enabled(false),
101 _needs_permute(false),
120 _original_weights = weights;
121 _original_bias = biases;
124 _has_bias = biases !=
nullptr;
133 const Size2D pad_valid =
Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
134 pad_decomposable(input_dims.y() + kernel_size.y() - 1));
137 const ICLTensor *weights_to_use = weights;
138 ICLTensor *output_to_use = _has_bias ? &_bias_output : output;
141 if(biases !=
nullptr)
151 _memory_group.
manage(&_permuted_input);
160 input_to_use = &_permuted_input;
161 weights_to_use = &_permuted_weights;
165 _flipped_weights.
allocator()->
init(weights_to_use->
info()->
clone()->set_is_resizable(
true).reset_padding());
167 _flip_weights_func.
configure(compile_context, weights_to_use, &_flipped_weights, &_flip_axis);
170 const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
171 _pad_weights_func.
configure(compile_context, &_flipped_weights, &_padded_weights, padding_w);
174 _transform_weights_func = std::make_unique<CLFFT2D>();
175 _transform_weights_func->configure(compile_context, &_padded_weights, &_transformed_weights,
FFT2DInfo());
178 const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
179 _memory_group.
manage(&_padded_input);
180 _pad_input_func.
configure(compile_context, input_to_use, &_padded_input, padding_in);
187 _memory_group.
manage(&_transformed_input);
188 _transform_input_func.
configure(compile_context, &_padded_input, &_transformed_input,
FFT2DInfo());
192 _memory_group.
manage(&_output_product);
193 _prod_func.
configure(compile_context, &_transformed_input, &_transformed_weights, &_output_product);
197 _memory_group.
manage(&_output_reduced);
202 _memory_group.
manage(&_itransformed_output);
205 _itransformed_output.
allocator()->
init(_output_reduced.
info()->
clone()->set_is_resizable(
true).set_num_channels(1).reset_padding());
206 _itransform_output_func.
configure(compile_context, &_output_reduced, &_itransformed_output, itranform_info);
212 _reshaped_output.
allocator()->
init(_itransformed_output.
info()->
clone()->set_tensor_shape(reshaped_shape));
215 const int start_left = kernel_size.x() - conv_info.
pad_left() - 1;
216 const int start_top = kernel_size.y() - conv_info.
pad_top() - 1;
217 const int end_right = _reshaped_output.
info()->
tensor_shape().
x() - (kernel_size.x() - conv_info.
pad_right() - 1) - pad_valid.x();
218 const int end_botton = _reshaped_output.
info()->
tensor_shape().
y() - (kernel_size.y() - conv_info.
pad_bottom() - 1) - pad_valid.y();
221 _memory_group.
manage(&_bias_output);
223 else if(_needs_permute)
225 output_to_use = &_permuted_output;
226 _memory_group.
manage(&_permuted_output);
228 _extract_output_func.
configure(compile_context, &_reshaped_output, output_to_use,
Coordinates(start_left, start_top),
Coordinates(end_right, end_botton));
232 if(biases !=
nullptr)
234 output_to_use = output;
237 output_to_use = &_permuted_output;
238 _memory_group.
manage(&_permuted_output);
257 _is_activationlayer_enabled = act_info.enabled();
258 if(_is_activationlayer_enabled)
260 _activation_layer_func.
configure(compile_context, output,
nullptr, act_info);
265 _flip_axis.
map(
true);
266 auto axis_data =
reinterpret_cast<uint32_t *
>(_flip_axis.
buffer());
287 const auto strides = conv_info.
stride();
294 if(biases !=
nullptr)
301 if((output !=
nullptr) && (output->
total_size() != 0))
325 _permute_input_func.
run();
327 _pad_input_func.
run();
328 _transform_input_func.
run();
335 _itransform_output_func.
run();
337 _extract_output_func.
run();
341 _bias_add_func.
run();
345 _permute_output_func.
run();
349 if(_is_activationlayer_enabled)
351 _activation_layer_func.
run();
360 if(_original_bias !=
nullptr)
363 _permute_bias_func.
run();
367 const ICLTensor *cur_weights = _original_weights;
374 _permute_weights_func.
run();
376 cur_weights = &_permuted_weights;
381 _flip_weights_func.
run();
386 _pad_weights_func.
run();
393 _transform_weights_func->run();
397 _transform_weights_func.reset();
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
FFTDirection direction
Direction of the FFT.
void remove_dimension(size_t n)
Accessor to remove the dimension n from the tensor shape.
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config)
Initialise the function's source, destinations and border mode.
bool enabled() const
Check if initialised.
std::vector< PaddingInfo > PaddingList
List of padding information.
static CLScheduler & get()
Access the scheduler singleton.
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
void run() override
Run the kernels contained in the function.
const cl::Buffer & cl_buffer() const override
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
Descriptor used by the FFT2D function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
1 channel, 1 F32 per channel
Strides PermutationVector
Permutation vector.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
void run() override
Run the kernels contained in the function.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CLFFTConvolutionLayer.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
unsigned int pad_top() const
Get the top padding.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
Status import_memory(cl::Buffer buffer)
Import an existing memory as a tensor's backing memory.
void run() override
Run the kernels contained in the function.
1 channel, 1 F16 per channel
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
void mark_as_unused() const
Marks a tensor as unused.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
T x() const
Alias to access the size of the first dimension.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
ITensorInfo & set_data_layout(const DataLayout &data_layout) override
Set the data layout of the tensor.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
void run() override final
Run the kernels contained in the function.
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void run() override
Run the kernels contained in the function.
void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value=PixelValue(), PaddingMode mode=PaddingMode::CONSTANT)
Initialize the function.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
unsigned int pad_right() const
Get the right padding.
Padding and stride information class.
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Num samples, channels, height, width.
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
Basic pool of threads to execute CPP/Neon code on several cores in parallel.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Memory group resources scope handling class.
Interface for OpenCL tensor.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
CLFFTConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
#define ARM_COMPUTE_LOG_PARAMS(...)
Store the tensor's metadata.
void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
T y() const
Alias to access the size of the second dimension.
void run() override
Run the kernels contained in the function.
void prepare() override
Prepare the function for executing.
unsigned int pad_bottom() const
Get the bottom padding.
void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
unsigned int pad_left() const
Get the left padding.
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output.
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis)
Initialize the function.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.