Compute Library
 21.11
CLFFT1D.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
33 
34 #include "src/common/utils/Log.h"
35 
36 namespace arm_compute
37 {
38 CLFFT1D::CLFFT1D(std::shared_ptr<IMemoryManager> memory_manager)
39  : _memory_group(std::move(memory_manager)),
40  _digit_reverse_kernel(std::make_unique<CLFFTDigitReverseKernel>()),
41  _fft_kernels(),
42  _scale_kernel(std::make_unique<CLFFTScaleKernel>()),
43  _digit_reversed_input(),
44  _digit_reverse_indices(),
45  _num_ffts(0),
46  _run_scale(false)
47 {
48 }
49 
50 CLFFT1D::~CLFFT1D() = default;
51 
52 void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config)
53 {
54  configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
55 }
56 
57 void CLFFT1D::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config)
58 {
59  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
60  ARM_COMPUTE_ERROR_THROW_ON(CLFFT1D::validate(input->info(), output->info(), config));
61  ARM_COMPUTE_LOG_PARAMS(input, output, config);
62 
63  // Decompose size to radix factors
64  const auto supported_radix = CLFFTRadixStageKernel::supported_radix();
65  const unsigned int N = input->info()->tensor_shape()[config.axis];
66  const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N, supported_radix);
67  ARM_COMPUTE_ERROR_ON(decomposed_vector.empty());
68 
69  // Flags
70  _run_scale = config.direction == FFTDirection::Inverse;
71  const bool is_c2r = input->info()->num_channels() == 2 && output->info()->num_channels() == 1;
72 
73  // Configure digit reverse
74  FFTDigitReverseKernelInfo digit_reverse_config;
75  digit_reverse_config.axis = config.axis;
76  digit_reverse_config.conjugate = config.direction == FFTDirection::Inverse;
77  TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32);
78  _digit_reverse_indices.allocator()->init(digit_reverse_indices_info);
79  _memory_group.manage(&_digit_reversed_input);
80  _digit_reverse_kernel->configure(compile_context, input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
81 
82  // Create and configure FFT kernels
83  unsigned int Nx = 1;
84  _num_ffts = decomposed_vector.size();
85  _fft_kernels.reserve(_num_ffts);
86  for(unsigned int i = 0; i < _num_ffts; ++i)
87  {
88  const unsigned int radix_for_stage = decomposed_vector.at(i);
89 
90  FFTRadixStageKernelInfo fft_kernel_info;
91  fft_kernel_info.axis = config.axis;
92  fft_kernel_info.radix = radix_for_stage;
93  fft_kernel_info.Nx = Nx;
94  fft_kernel_info.is_first_stage = (i == 0);
95  _fft_kernels.emplace_back(std::make_unique<CLFFTRadixStageKernel>());
96  _fft_kernels.back()->configure(compile_context, &_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
97 
98  Nx *= radix_for_stage;
99  }
100 
101  // Configure scale kernel
102  if(_run_scale)
103  {
104  FFTScaleKernelInfo scale_config;
105  scale_config.scale = static_cast<float>(N);
106  scale_config.conjugate = config.direction == FFTDirection::Inverse;
107  is_c2r ? _scale_kernel->configure(compile_context, &_digit_reversed_input, output, scale_config) : _scale_kernel->configure(output, nullptr, scale_config);
108  }
109 
110  // Allocate tensors
111  _digit_reversed_input.allocator()->allocate();
112  _digit_reverse_indices.allocator()->allocate();
113 
114  // Init digit reverse indices
115  const auto digit_reverse_cpu = arm_compute::helpers::fft::digit_reverse_indices(N, decomposed_vector);
116  _digit_reverse_indices.map(CLScheduler::get().queue(), true);
117  std::copy_n(digit_reverse_cpu.data(), N, reinterpret_cast<unsigned int *>(_digit_reverse_indices.buffer()));
118  _digit_reverse_indices.unmap(CLScheduler::get().queue());
119 }
120 
121 Status CLFFT1D::validate(const ITensorInfo *input, const ITensorInfo *output, const FFT1DInfo &config)
122 {
125  ARM_COMPUTE_RETURN_ERROR_ON(input->num_channels() != 1 && input->num_channels() != 2);
126  ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({ 0, 1 }).count(config.axis) == 0);
127 
128  // Check if FFT is decomposable
129  const auto supported_radix = CLFFTRadixStageKernel::supported_radix();
130  const unsigned int N = input->tensor_shape()[config.axis];
131  const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N, supported_radix);
132  ARM_COMPUTE_RETURN_ERROR_ON(decomposed_vector.empty());
133 
134  // Checks performed when output is configured
135  if((output != nullptr) && (output->total_size() != 0))
136  {
137  ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() == 1 && input->num_channels() == 1);
138  ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() != 1 && output->num_channels() != 2);
141  }
142 
143  return Status{};
144 }
145 
147 {
148  MemoryGroupResourceScope scope_mg(_memory_group);
149 
150  // Run digit reverse
151  CLScheduler::get().enqueue(*_digit_reverse_kernel, false);
152 
153  // Run radix kernels
154  for(unsigned int i = 0; i < _num_ffts; ++i)
155  {
156  CLScheduler::get().enqueue(*_fft_kernels[i], i == (_num_ffts - 1) && !_run_scale);
157  }
158 
159  // Run output scaling
160  if(_run_scale)
161  {
162  CLScheduler::get().enqueue(*_scale_kernel, true);
163  }
164 }
165 } // namespace arm_compute
unsigned int axis
Axis to run the FFT on.
Shape of a tensor.
Definition: TensorShape.h:39
void configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config)
Initialise the function&#39;s source, destinations and border mode.
Definition: CLFFT1D.cpp:52
static CLScheduler & get()
Access the scheduler singleton.
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
Definition: fft.cpp:34
Descriptor used by the FFT1D function.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFT1DInfo &config)
Static function to check if given info will lead to a valid configuration of CLFFT1D.
Definition: CLFFT1D.cpp:121
std::vector< unsigned int > digit_reverse_indices(unsigned int N, const std::vector< unsigned int > &fft_stages)
Calculate digit reverse index vector given fft size and the decomposed stages.
Definition: fft.cpp:79
~CLFFT1D()
Default destructor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
CLTensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: CLTensor.cpp:61
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
unsigned int axis
Axis to run the kernel on.
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
Definition: CLTensor.cpp:66
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
Definition: ICLTensor.cpp:53
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
unsigned int N
bool conjugate
Flag to conjugate the output/.
Descriptor used by the FFT core kernels.
Descriptor for FFT scale kernels.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
CLCompileContext class.
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
unsigned int axis
Axis to perform the kernel on.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
unsigned int radix
Radix to use.
Interface for the inverse fft scale kernel.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
CLFFT1D(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default Constructor.
Definition: CLFFT1D.cpp:38
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:439
bool is_first_stage
Flags if the FFT kernels is the first stage of a decomposed FFT.
Descriptor for FFT digit reverse kernels.
unsigned int Nx
Nx coefficient.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
FFTDirection direction
Direction of the FFT.
float scale
Axis to perform the kernel on.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(t,...)
Definition: Validate.h:690
void run() override
Run the kernels contained in the function.
Definition: CLFFT1D.cpp:146
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
Definition: CLTensor.cpp:71
virtual size_t num_channels() const =0
The number of channels for each tensor element.
bool conjugate
Flag to conjugate the output/.
Interface for the digit reverse operation kernel.