Compute Library
 21.02
CLFFT1D.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
33 
34 namespace arm_compute
35 {
36 CLFFT1D::CLFFT1D(std::shared_ptr<IMemoryManager> memory_manager)
37  : _memory_group(std::move(memory_manager)),
38  _digit_reverse_kernel(std::make_unique<CLFFTDigitReverseKernel>()),
39  _fft_kernels(),
40  _scale_kernel(std::make_unique<CLFFTScaleKernel>()),
41  _digit_reversed_input(),
42  _digit_reverse_indices(),
43  _num_ffts(0),
44  _run_scale(false)
45 {
46 }
47 
48 CLFFT1D::~CLFFT1D() = default;
49 
50 void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config)
51 {
52  configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
53 }
54 
55 void CLFFT1D::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config)
56 {
57  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
58  ARM_COMPUTE_ERROR_THROW_ON(CLFFT1D::validate(input->info(), output->info(), config));
59 
60  // Decompose size to radix factors
61  const auto supported_radix = CLFFTRadixStageKernel::supported_radix();
62  const unsigned int N = input->info()->tensor_shape()[config.axis];
63  const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N, supported_radix);
64  ARM_COMPUTE_ERROR_ON(decomposed_vector.empty());
65 
66  // Flags
67  _run_scale = config.direction == FFTDirection::Inverse;
68  const bool is_c2r = input->info()->num_channels() == 2 && output->info()->num_channels() == 1;
69 
70  // Configure digit reverse
71  FFTDigitReverseKernelInfo digit_reverse_config;
72  digit_reverse_config.axis = config.axis;
73  digit_reverse_config.conjugate = config.direction == FFTDirection::Inverse;
74  TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32);
75  _digit_reverse_indices.allocator()->init(digit_reverse_indices_info);
76  _memory_group.manage(&_digit_reversed_input);
77  _digit_reverse_kernel->configure(compile_context, input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
78 
79  // Create and configure FFT kernels
80  unsigned int Nx = 1;
81  _num_ffts = decomposed_vector.size();
82  _fft_kernels.reserve(_num_ffts);
83  for(unsigned int i = 0; i < _num_ffts; ++i)
84  {
85  const unsigned int radix_for_stage = decomposed_vector.at(i);
86 
87  FFTRadixStageKernelInfo fft_kernel_info;
88  fft_kernel_info.axis = config.axis;
89  fft_kernel_info.radix = radix_for_stage;
90  fft_kernel_info.Nx = Nx;
91  fft_kernel_info.is_first_stage = (i == 0);
92  _fft_kernels.emplace_back(std::make_unique<CLFFTRadixStageKernel>());
93  _fft_kernels.back()->configure(compile_context, &_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
94 
95  Nx *= radix_for_stage;
96  }
97 
98  // Configure scale kernel
99  if(_run_scale)
100  {
101  FFTScaleKernelInfo scale_config;
102  scale_config.scale = static_cast<float>(N);
103  scale_config.conjugate = config.direction == FFTDirection::Inverse;
104  is_c2r ? _scale_kernel->configure(compile_context, &_digit_reversed_input, output, scale_config) : _scale_kernel->configure(output, nullptr, scale_config);
105  }
106 
107  // Allocate tensors
108  _digit_reversed_input.allocator()->allocate();
109  _digit_reverse_indices.allocator()->allocate();
110 
111  // Init digit reverse indices
112  const auto digit_reverse_cpu = arm_compute::helpers::fft::digit_reverse_indices(N, decomposed_vector);
113  _digit_reverse_indices.map(CLScheduler::get().queue(), true);
114  std::copy_n(digit_reverse_cpu.data(), N, reinterpret_cast<unsigned int *>(_digit_reverse_indices.buffer()));
115  _digit_reverse_indices.unmap(CLScheduler::get().queue());
116 }
117 
118 Status CLFFT1D::validate(const ITensorInfo *input, const ITensorInfo *output, const FFT1DInfo &config)
119 {
122  ARM_COMPUTE_RETURN_ERROR_ON(input->num_channels() != 1 && input->num_channels() != 2);
123  ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({ 0, 1 }).count(config.axis) == 0);
124 
125  // Check if FFT is decomposable
126  const auto supported_radix = CLFFTRadixStageKernel::supported_radix();
127  const unsigned int N = input->tensor_shape()[config.axis];
128  const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N, supported_radix);
129  ARM_COMPUTE_RETURN_ERROR_ON(decomposed_vector.empty());
130 
131  // Checks performed when output is configured
132  if((output != nullptr) && (output->total_size() != 0))
133  {
134  ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() == 1 && input->num_channels() == 1);
135  ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() != 1 && output->num_channels() != 2);
138  }
139 
140  return Status{};
141 }
142 
144 {
145  MemoryGroupResourceScope scope_mg(_memory_group);
146 
147  // Run digit reverse
148  CLScheduler::get().enqueue(*_digit_reverse_kernel, false);
149 
150  // Run radix kernels
151  for(unsigned int i = 0; i < _num_ffts; ++i)
152  {
153  CLScheduler::get().enqueue(*_fft_kernels[i], i == (_num_ffts - 1) && !_run_scale);
154  }
155 
156  // Run output scaling
157  if(_run_scale)
158  {
159  CLScheduler::get().enqueue(*_scale_kernel, true);
160  }
161 }
162 } // namespace arm_compute
unsigned int axis
Axis to run the FFT on.
Shape of a tensor.
Definition: TensorShape.h:39
void configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config)
Initialise the function&#39;s source, destinations and border mode.
Definition: CLFFT1D.cpp:50
static CLScheduler & get()
Access the scheduler singleton.
std::vector< unsigned int > decompose_stages(unsigned int N, const std::set< unsigned int > &supported_factors)
Decompose a given 1D input size using the provided supported factors.
Definition: fft.cpp:34
Descriptor used by the FFT1D function.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFT1DInfo &config)
Static function to check if given info will lead to a valid configuration of CLFFT1D.
Definition: CLFFT1D.cpp:118
std::vector< unsigned int > digit_reverse_indices(unsigned int N, const std::vector< unsigned int > &fft_stages)
Calculate digit reverse index vector given fft size and the decomposed stages.
Definition: fft.cpp:79
~CLFFT1D()
Default destructor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
CLTensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: CLTensor.cpp:61
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
unsigned int axis
Axis to run the kernel on.
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
unsigned int N
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
Definition: CLTensor.cpp:66
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
Definition: ICLTensor.cpp:53
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
bool conjugate
Flag to conjugate the output/.
Descriptor used by the FFT core kernels.
Descriptor for FFT scale kernels.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
CLCompileContext class.
static std::set< unsigned int > supported_radix()
Returns the radix that are support by the FFT kernel.
unsigned int axis
Axis to perform the kernel on.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
unsigned int radix
Radix to use.
Interface for the inverse fft scale kernel.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
CLFFT1D(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default Constructor.
Definition: CLFFT1D.cpp:36
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:443
bool is_first_stage
Flags if the FFT kernels is the first stage of a decomposed FFT.
Descriptor for FFT digit reverse kernels.
unsigned int Nx
Nx coefficient.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
FFTDirection direction
Direction of the FFT.
float scale
Axis to perform the kernel on.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(t,...)
Definition: Validate.h:694
void run() override
Run the kernels contained in the function.
Definition: CLFFT1D.cpp:143
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
Definition: CLTensor.cpp:71
virtual size_t num_channels() const =0
The number of channels for each tensor element.
bool conjugate
Flag to conjugate the output/.
Interface for the digit reverse operation kernel.