24.02
|
Go to the documentation of this file.
47 int pad_decomposable(
int N)
52 bool is_decomposed =
false;
53 while (!is_decomposed)
56 is_decomposed = !decomposed_vector.empty();
66 : _memory_group(memory_manager),
68 _permute_input_func(),
69 _permute_output_func(),
70 _permute_weights_func(),
74 _transform_input_func(memory_manager),
75 _transform_weights_func(),
76 _itransform_output_func(memory_manager),
79 _extract_output_func(),
81 _activation_layer_func(),
91 _transformed_weights(),
92 _input_weights_product(),
95 _itransformed_output(),
98 _original_weights(nullptr),
99 _original_bias(nullptr),
100 _is_activationlayer_enabled(false),
101 _needs_permute(false),
113 bool enable_fast_math)
126 bool enable_fast_math)
130 biases !=
nullptr ? biases->
info() :
nullptr,
134 _original_weights = weights;
135 _original_bias = biases;
138 _has_bias = biases !=
nullptr;
148 const Size2D kernel_size =
150 const Size2D pad_valid =
Size2D(pad_decomposable(input_dims.
x() + kernel_size.
x() - 1),
151 pad_decomposable(input_dims.
y() + kernel_size.
y() - 1));
154 const ICLTensor *weights_to_use = weights;
155 ICLTensor *output_to_use = _has_bias ? &_bias_output : output;
158 if (biases !=
nullptr)
168 _memory_group.
manage(&_permuted_input);
177 input_to_use = &_permuted_input;
178 weights_to_use = &_permuted_weights;
182 _flipped_weights.
allocator()->
init(weights_to_use->
info()->
clone()->set_is_resizable(
true).reset_padding());
184 _flip_weights_func.
configure(compile_context, weights_to_use, &_flipped_weights, &_flip_axis,
188 const PaddingList padding_w = {{0, input_dims.
x() + pad_valid.
x() - 1}, {0, input_dims.
y() + pad_valid.
y() - 1}};
189 _pad_weights_func.
configure(compile_context, &_flipped_weights, &_padded_weights, padding_w);
192 _transform_weights_func = std::make_unique<CLFFT2D>();
193 _transform_weights_func->configure(compile_context, &_padded_weights, &_transformed_weights,
FFT2DInfo());
196 const PaddingList padding_in = {{0, kernel_size.
x() + pad_valid.
x() - 1}, {0, kernel_size.
y() + pad_valid.
y() - 1}};
197 _memory_group.
manage(&_padded_input);
198 _pad_input_func.
configure(compile_context, input_to_use, &_padded_input, padding_in);
205 _memory_group.
manage(&_transformed_input);
206 _transform_input_func.
configure(compile_context, &_padded_input, &_transformed_input,
FFT2DInfo());
210 _memory_group.
manage(&_output_product);
211 _prod_func.
configure(compile_context, &_transformed_input, &_transformed_weights, &_output_product);
215 _memory_group.
manage(&_output_reduced);
220 _memory_group.
manage(&_itransformed_output);
224 _output_reduced.
info()->
clone()->set_is_resizable(
true).set_num_channels(1).reset_padding());
225 _itransform_output_func.
configure(compile_context, &_output_reduced, &_itransformed_output, itranform_info);
231 _reshaped_output.
allocator()->
init(_itransformed_output.
info()->
clone()->set_tensor_shape(reshaped_shape));
234 const int start_left = kernel_size.
x() -
conv_info.pad_left() - 1;
235 const int start_top = kernel_size.
y() -
conv_info.pad_top() - 1;
236 const int end_right =
238 const int end_botton =
242 _memory_group.
manage(&_bias_output);
244 else if (_needs_permute)
246 output_to_use = &_permuted_output;
247 _memory_group.
manage(&_permuted_output);
249 _extract_output_func.
configure(compile_context, &_reshaped_output, output_to_use,
254 if (biases !=
nullptr)
256 output_to_use = output;
259 output_to_use = &_permuted_output;
260 _memory_group.
manage(&_permuted_output);
279 _is_activationlayer_enabled =
act_info.enabled();
280 if (_is_activationlayer_enabled)
287 _flip_axis.
map(
true);
288 auto axis_data =
reinterpret_cast<uint32_t *
>(_flip_axis.
buffer());
300 bool enable_fast_math)
318 conv_info.pad_right() != (kernel_size.
x() / 2));
320 conv_info.pad_bottom() != (kernel_size.
y() / 2));
323 if (biases !=
nullptr)
330 if ((output !=
nullptr) && (output->
total_size() != 0))
355 _permute_input_func.
run();
357 _pad_input_func.
run();
358 _transform_input_func.
run();
365 _itransform_output_func.
run();
367 _extract_output_func.
run();
371 _bias_add_func.
run();
375 _permute_output_func.
run();
379 if (_is_activationlayer_enabled)
381 _activation_layer_func.
run();
390 if (_original_bias !=
nullptr)
393 _permute_bias_func.
run();
397 const ICLTensor *cur_weights = _original_weights;
404 _permute_weights_func.
run();
406 cur_weights = &_permuted_weights;
411 _flip_weights_func.
run();
416 _pad_weights_func.
run();
423 _transform_weights_func->run();
427 _transform_weights_func.reset();
@ NCHW
Num samples, channels, height, width.
void run() override
Run the kernels contained in the function.
std::unique_ptr< ITensorInfo > clone() const override
size_t y() const
Semantic accessor for height as y.
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
void run() override
Run the kernels contained in the function.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::vector< PaddingInfo > PaddingList
List of padding information.
@ NHWC
Num samples, height, width, channels.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
void run() override
Run the kernels contained in the function.
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
Interface for OpenCL tensor.
Class for specifying the size of an image or rectangle.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Activation Layer Information class.
FFTDirection direction
Direction of the FFT.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory.
Strides PermutationVector
Permutation vector.
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output.
Status import_memory(cl::Buffer buffer)
Import an existing memory as a tensor's backing memory.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
@ U32
unsigned 32-bit number
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
void run() override
Run the kernels contained in the function.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
void mark_as_unused() const
Marks a tensor as unused.
ITensorInfo & set_data_layout(const DataLayout &data_layout) override
Set the data layout of the tensor.
void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Set the input and output tensors.
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
const cl::Buffer & cl_buffer() const override
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
void run() override
Run the kernels contained in the function.
T x() const
Alias to access the size of the first dimension.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CLFFTConvolutionLayer.
size_t x() const
Semantic accessor for width as x.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
Basic pool of threads to execute CPP/Neon code on several cores in parallel.
Descriptor used by the FFT2D function.
static CLScheduler & get()
Access the scheduler singleton.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Store the tensor's metadata.
bool is_used() const
Flags if the tensor is used or not.
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis, bool use_inverted_axis)
Initialize the function.
void free() override
Free allocated OpenCL memory.
void prepare() override
Prepare the function for executing.
Memory group resources scope handling class.
Copyright (c) 2017-2024 Arm Limited.
void remove_dimension(size_t n, bool apply_dim_correction=true)
Accessor to remove the dimension n from the tensor shape.
void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
@ F16
16-bit floating-point number
void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value=PixelValue(), PaddingMode mode=PaddingMode::CONSTANT)
Initialize the function.
void run() override
Run the kernels contained in the function.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Store the tensor's metadata.
CLFFTConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
@ F32
32-bit floating-point number
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
T y() const
Alias to access the size of the second dimension.
#define ARM_COMPUTE_LOG_PARAMS(...)
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
void configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config)
Initialise the function's source, destinations and border mode.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
void run() override final
Run the kernels contained in the function.