Compute Library
 23.08
DFT.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "DFT.h"
25 
26 #include "PadLayer.h"
27 #include "Permute.h"
28 #include "Reverse.h"
29 #include "SliceOperations.h"
31 
32 #include <cmath>
33 
34 namespace arm_compute
35 {
36 namespace test
37 {
38 namespace validation
39 {
40 namespace reference
41 {
42 namespace
43 {
44 /** Performs an one dimensional DFT on a given real sequence.
45  *
46  * @param[in] src_ptr Pointer to the real input sequence.
47  * @param[in] N Size of input sequence.
48  * @param[out] dst_ptr Pointer to the complex output sequence.
49  * @param[out] K Size of the output sequence
50  */
51 template <typename T>
52 void rdft_1d_step(const T *src_ptr, size_t N, T *dst_ptr, size_t K)
53 {
54 #if defined(_OPENMP)
55  #pragma omp parallel for
56 #endif /* _OPENMP */
57  for(unsigned int k = 0; k < K; ++k)
58  {
59  float Xr = 0;
60  float Xi = 0;
61  for(unsigned int n = 0; n < N; ++n)
62  {
63  const float alpha = (2 * M_PI * k * n) / N;
64  const float val_r = src_ptr[n];
65  // Assuming DFT from the R domain thus skipping imaginary calculations
66  Xr += val_r * cos(alpha);
67  Xi -= val_r * sin(alpha);
68  }
69 
70  dst_ptr[k * 2] = Xr;
71  dst_ptr[k * 2 + 1] = Xi;
72  }
73 }
74 
75 /** Performs an one dimensional DFT on a given complex sequence.
76  *
77  * @param[in] src_ptr Pointer to the complex input sequence.
78  * @param[out] dst_ptr Pointer to the complex output sequence.
79  * @param[in] N Size of the sequences
80  */
81 template <typename T>
82 void dft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
83 {
84 #if defined(_OPENMP)
85  #pragma omp parallel for
86 #endif /* _OPENMP */
87  for(unsigned int k = 0; k < N; ++k)
88  {
89  float Xr = 0;
90  float Xi = 0;
91  for(unsigned int n = 0; n < N; ++n)
92  {
93  const float alpha = (2 * M_PI * k * n) / N;
94  const float val_r = src_ptr[2 * n];
95  const float val_i = src_ptr[2 * n + 1];
96  const float cos_alpha = cos(alpha);
97  const float sin_alpha = sin(alpha);
98 
99  Xr += val_r * cos_alpha + val_i * sin_alpha;
100  Xi += val_i * cos_alpha - val_r * sin_alpha;
101  }
102 
103  dst_ptr[k * 2] = Xr;
104  dst_ptr[k * 2 + 1] = Xi;
105  }
106 }
107 
108 /** Performs an one dimensional inverse DFT on a given real sequence.
109  *
110  * @param[in] src_ptr Pointer to the real input sequence.
111  * @param[in] K Size of input sequence.
112  * @param[out] dst_ptr Pointer to the complex output sequence.
113  * @param[out] N Size of the output sequence
114  */
115 template <typename T>
116 void irdft_1d_step(const T *src_ptr, size_t K, T *dst_ptr, size_t N)
117 {
118  const bool is_odd = N % 2;
119  const unsigned int Nleft = N - K;
120  const int tail_start = is_odd ? K - 1 : K - 2;
121 #if defined(_OPENMP)
122  #pragma omp parallel for
123 #endif /* _OPENMP */
124  for(unsigned int n = 0; n < N; ++n)
125  {
126  float xr = 0;
127  for(unsigned int k = 0; k < K; ++k)
128  {
129  const float alpha = (2 * M_PI * k * n) / N;
130  xr += src_ptr[2 * k] * cos(alpha) - src_ptr[2 * k + 1] * sin(alpha);
131  }
132 
133  unsigned int j = tail_start;
134  for(unsigned int k = 0; k < Nleft; ++k)
135  {
136  const float alpha = (2 * M_PI * (k + K) * n) / N;
137  xr += src_ptr[2 * j] * cos(alpha) + src_ptr[2 * j + 1] * sin(alpha);
138  --j;
139  }
140 
141  dst_ptr[n] = xr;
142  }
143 }
144 
145 /** Performs an one dimensional inverse DFT on a given complex sequence.
146  *
147  * @param[in] src_ptr Pointer to the complex input sequence.
148  * @param[out] dst_ptr Pointer to the complex output sequence.
149  * @param[in] N Size of the sequences
150  */
151 template <typename T>
152 void idft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
153 {
154 #if defined(_OPENMP)
155  #pragma omp parallel for
156 #endif /* _OPENMP */
157  for(unsigned int n = 0; n < N; ++n)
158  {
159  float xr = 0;
160  float xi = 0;
161  for(unsigned int k = 0; k < N; ++k)
162  {
163  const float alpha = (2 * M_PI * k * n) / N;
164  const float cos_alpha = cos(alpha);
165  const float sin_alpha = sin(alpha);
166  const float val_r = src_ptr[2 * k];
167  const float val_i = src_ptr[2 * k + 1];
168 
169  xr += val_r * cos_alpha - val_i * sin_alpha;
170  xi += val_i * cos_alpha + val_r * sin_alpha;
171  }
172 
173  dst_ptr[2 * n] = xr;
174  dst_ptr[2 * n + 1] = xi;
175  }
176 }
177 
178 template <typename T>
179 SimpleTensor<T> rdft_1d_core(const SimpleTensor<T> &src, FFTDirection direction, bool is_odd)
180 {
181  // Performs only rdft
182  ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Forward && src.num_channels() != 1);
183  ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Inverse && src.num_channels() != 2);
184 
185  const unsigned int inverse_tail = is_odd ? 1 : 0;
186  const unsigned int N = src.shape()[0];
187  const unsigned int K = direction == FFTDirection::Forward ? N / 2 + 1 : (N - 1) * 2 + inverse_tail;
188  const unsigned int num_channels = direction == FFTDirection::Forward ? 2 : 1;
189 
190  TensorShape dst_shape = src.shape();
191  dst_shape.set(0, K);
192 
193  SimpleTensor<T> dst(dst_shape, src.data_type(), num_channels);
194 
195  const unsigned int upper_dims = src.shape().total_size_upper(1);
196 #if defined(_OPENMP)
197  #pragma omp parallel for
198 #endif /* _OPENMP */
199  for(unsigned int du = 0; du < upper_dims; ++du)
200  {
201  const T *src_row_ptr = src.data() + du * N * src.num_channels();
202  T *dst_row_ptr = dst.data() + du * K * dst.num_channels();
203  direction == FFTDirection::Forward ? rdft_1d_step(src_row_ptr, N, dst_row_ptr, K) : irdft_1d_step(src_row_ptr, N, dst_row_ptr, K);
204  }
205 
206  return dst;
207 }
208 
209 template <typename T>
210 SimpleTensor<T> dft_1d_core(const SimpleTensor<T> &src, FFTDirection direction)
211 {
212  ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
213 
214  const unsigned int N = src.shape()[0];
215 
216  SimpleTensor<T> dst(src.shape(), src.data_type(), src.num_channels());
217 
218  const unsigned int upper_dims = src.shape().total_size_upper(1);
219 #if defined(_OPENMP)
220  #pragma omp parallel for
221 #endif /* _OPENMP */
222  for(unsigned int du = 0; du < upper_dims; ++du)
223  {
224  const T *src_row_ptr = src.data() + du * N * src.num_channels();
225  T *dst_row_ptr = dst.data() + du * N * dst.num_channels();
226  direction == FFTDirection::Forward ? dft_1d_step(src_row_ptr, dst_row_ptr, N) : idft_1d_step(src_row_ptr, dst_row_ptr, N);
227  }
228 
229  return dst;
230 }
231 
232 /** Scale a tensor by a given scaling factor.
233  *
234  * @param[in,out] tensor Tensor to scale.
235  * @param[in] scaling_factor Scaling to scale the tensor data with.
236  */
237 template <typename T>
238 void scale(SimpleTensor<T> &tensor, T scaling_factor)
239 {
240  const int total_elements = tensor.num_elements() * tensor.num_channels();
241  T *data_ptr = tensor.data();
242 #if defined(_OPENMP)
243  #pragma omp parallel for
244 #endif /* _OPENMP */
245  for(int i = 0; i < total_elements; ++i)
246  {
247  data_ptr[i] /= scaling_factor;
248  }
249 }
250 
251 /** Performs a complex element-wise multiplication with reduction across the channels axis.
252  *
253  * @param[in] input Input tensor.
254  * @param[in] weights Weights tensor.
255  *
256  * @return Output tensor.
257  */
258 template <typename T>
259 SimpleTensor<T> complex_mul_and_reduce(const SimpleTensor<T> &input, const SimpleTensor<T> &weights)
260 {
261  const uint32_t W = input.shape().x();
262  const uint32_t H = input.shape().y();
263  const uint32_t Ci = input.shape().z();
264  const uint32_t Co = weights.shape()[3];
265  const uint32_t N = input.shape().total_size() / (W * H * Ci);
266 
267  TensorShape output_shape = input.shape();
268  output_shape.set(2, Co);
269  SimpleTensor<T> dst(output_shape, input.data_type(), input.num_channels());
270 
271  // dst memory to zero
272  const auto total_element_count = dst.num_channels() * dst.num_elements();
273  std::fill_n(dst.data(), total_element_count, 0);
274 
275  for(uint32_t b = 0; b < N; ++b)
276  {
277  for(uint32_t co = 0; co < Co; ++co)
278  {
279  for(uint32_t ci = 0; ci < Ci; ++ci)
280  {
281  for(uint32_t h = 0; h < H; ++h)
282  {
283  for(uint32_t w = 0; w < W; ++w)
284  {
285  const uint32_t i_index = w + h * W + ci * H * W + b * H * W * Ci;
286  const uint32_t w_index = w + h * W + ci * H * W + co * H * W * Ci;
287  const uint32_t o_index = w + h * W + co * H * W + b * H * W * Co;
288  const Coordinates i_coords = index2coords(input.shape(), i_index);
289  const Coordinates w_coords = index2coords(weights.shape(), w_index);
290  const Coordinates o_coords = index2coords(dst.shape(), o_index);
291 
292  auto i_ptr = static_cast<const T *>(input(i_coords));
293  auto w_ptr = static_cast<const T *>(weights(w_coords));
294  auto o_ptr = static_cast<T *>(dst(o_coords));
295 
296  const T Rin = i_ptr[0];
297  const T Iin = i_ptr[1];
298  const T Rw = w_ptr[0];
299  const T Iw = w_ptr[1];
300 
301  o_ptr[0] += Rin * Rw - Iin * Iw;
302  o_ptr[1] += Rin * Iw + Rw * Iin;
303  }
304  }
305  }
306  }
307  }
308  return dst;
309 }
310 } // namespace
311 
312 template <typename T>
314 {
315  return rdft_1d_core(src, FFTDirection::Forward, false);
316 }
317 
318 template <typename T>
320 {
321  auto dst = rdft_1d_core(src, FFTDirection::Inverse, is_odd);
322 
323  const T scaling_factor = T(dst.shape()[0]);
324  scale(dst, scaling_factor);
325 
326  return dst;
327 }
328 
329 template <typename T>
331 {
332  auto dst = dft_1d_core(src, direction);
333  if(direction == FFTDirection::Inverse)
334  {
335  const T scaling_factor = T(dst.shape()[0]);
336  scale(dst, scaling_factor);
337  }
338  return dst;
339 }
340 
341 template <typename T>
343 {
344  ARM_COMPUTE_ERROR_ON(src.num_channels() != 1);
345  constexpr FFTDirection direction = FFTDirection::Forward;
346 
347  auto first_pass = rdft_1d_core(src, direction, false);
348  auto transposed = permute(first_pass, PermutationVector(1U, 0U));
349  auto second_pass = dft_1d_core(transposed, direction);
350  return permute(second_pass, PermutationVector(1U, 0U));
351 }
352 
353 template <typename T>
355 {
356  ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
357  constexpr FFTDirection direction = FFTDirection::Inverse;
358 
359  auto transposed = permute(src, PermutationVector(1U, 0U));
360  auto first_pass = dft_1d_core(transposed, direction);
361  auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
362  auto dst = rdft_1d_core(transposed_2, direction, is_odd);
363 
364  const T scaling_factor = T(dst.shape()[0] * dst.shape()[1]);
365  scale(dst, scaling_factor);
366  return dst;
367 }
368 
369 template <typename T>
371 {
372  ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
373 
374  if(direction == FFTDirection::Forward)
375  {
376  auto first_pass = dft_1d_core(src, direction);
377  auto transposed = permute(first_pass, PermutationVector(1U, 0U));
378  auto second_pass = dft_1d_core(transposed, direction);
379  return permute(second_pass, PermutationVector(1U, 0U));
380  }
381  else
382  {
383  auto transposed = permute(src, PermutationVector(1U, 0U));
384  auto first_pass = dft_1d_core(transposed, direction);
385  auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
386  auto dst = dft_1d_core(transposed_2, direction);
387 
388  const T scaling_factor = T(dst.shape()[0] * dst.shape()[1]);
389  scale(dst, scaling_factor);
390 
391  return dst;
392  }
393 }
394 
395 template <typename T>
397 {
398  // Pad input to full padding
399  const PaddingList padding_in = { { 0, w.shape()[0] - 1 }, { 0, w.shape()[1] - 1 } };
400  auto padded_src = pad_layer(src, padding_in);
401 
402  // Flip weights
403  std::vector<uint32_t> axis_v = { 0, 1 };
405  std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data());
406  auto flipped_w = reverse(w, axis);
407 
408  // Pad weights to have the same size as input
409  const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } };
410  auto padded_w = pad_layer(flipped_w, paddings_w);
411 
412  // Transform input and weights to frequency domain
413  auto Fsrc = rdft_2d(padded_src);
414  auto Fw = rdft_2d(padded_w);
415 
416  // Perform dot product
417  auto Fdst = complex_mul_and_reduce(Fsrc, Fw);
418 
419  // Transform output back to frequency domain
420  auto conv_res = ridft_2d(Fdst);
421 
422  // Slice output
423  const int start_left = w.shape().x() - conv_info.pad_left() - 1;
424  const int start_top = w.shape().y() - conv_info.pad_top() - 1;
425  const int end_right = conv_res.shape().x() - (w.shape().x() - conv_info.pad_right() - 1);
426  const int end_botton = conv_res.shape().y() - (w.shape().y() - conv_info.pad_bottom() - 1);
427  return slice(conv_res, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
428 }
429 
430 // FP32
434 
438 
440 
441 // FP16
444 template SimpleTensor<half> dft_1d(const SimpleTensor<half> &src, FFTDirection direction);
445 
448 template SimpleTensor<half> dft_2d(const SimpleTensor<half> &src, FFTDirection direction);
449 
451 } // namespace reference
452 } // namespace validation
453 } // namespace test
454 } // namespace arm_compute
arm_compute::test::validation::dst_shape
TensorShape dst_shape
Definition: DFT.cpp:164
arm_compute::test::validation::reference::rdft_2d
SimpleTensor< T > rdft_2d(const SimpleTensor< T > &src)
Performs a two dimensional DFT on a real input.
Definition: DFT.cpp:342
arm_compute::test::validation::reference::rdft_1d
SimpleTensor< T > rdft_1d(const SimpleTensor< T > &src)
Performs an one dimensional DFT on a real input.
Definition: DFT.cpp:313
arm_compute::test::validation::src
SimpleTensor< float > src
Definition: DFT.cpp:155
N
unsigned int N
Definition: CpuGemmAssemblyDispatch.cpp:96
arm_compute::PaddingList
std::vector< PaddingInfo > PaddingList
List of padding information.
Definition: Types.h:413
SliceOperations.h
arm_compute::test::validation::output_shape
const auto output_shape
Definition: ConvolutionLayer.cpp:411
arm_compute::TensorShape
Shape of a tensor.
Definition: TensorShape.h:39
arm_compute::test::validation::dst
auto dst
Definition: DFT.cpp:170
arm_compute::test::validation::reference::ridft_1d
SimpleTensor< T > ridft_1d(const SimpleTensor< T > &src, bool is_odd)
Performs an one dimensional inverse DFT on a real input.
Definition: DFT.cpp:319
arm_compute::test::validation::reference::FFTDirection::Inverse
@ Inverse
M_PI
#define M_PI
Definition: ToolchainSupport.h:40
arm_compute::index2coords
Coordinates index2coords(const TensorShape &shape, int index)
Convert a linear index into n-dimensional coordinates.
Definition: Helpers.inl:166
arm_compute::test::validation::k
const unsigned int k
Definition: GEMMMatrixMultiplyNative.cpp:361
Reverse.h
Permute.h
arm_compute::test::validation::reference::pad_layer
SimpleTensor< T > pad_layer(const SimpleTensor< T > &src, const PaddingList &paddings, const PixelValue const_value, const PaddingMode mode)
Reference function to pad an ND tensor.
Definition: PadLayer.cpp:39
arm_compute::utils::cast::U
U
Definition: SaturateCast.h:64
arm_compute::PermutationVector
Strides PermutationVector
Permutation vector.
Definition: CoreTypes.h:37
ARM_COMPUTE_ERROR_ON
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:467
arm_compute::DataType::U32
@ U32
unsigned 32-bit number
arm_compute::test::validation::reference::dft_2d
SimpleTensor< T > dft_2d(const SimpleTensor< T > &src, FFTDirection direction)
Performs a two dimensional DFT on a complex input.
Definition: DFT.cpp:370
PadLayer.h
arm_compute::FFTDirection
FFTDirection
FFT direction to use.
Definition: FunctionDescriptors.h:35
arm_compute::test::validation::w
SimpleTensor< float > w
Definition: DFT.cpp:156
arm_compute::test::validation::reference::scale
SimpleTensor< T > scale(const SimpleTensor< T > &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info)
Definition: Scale.cpp:184
arm_compute::test::validation::reference::dft_1d
SimpleTensor< T > dft_1d(const SimpleTensor< T > &src, FFTDirection direction)
Performs an one dimensional DFT on a complex input.
Definition: DFT.cpp:330
arm_compute::test::validation::reference::copy
SimpleTensor< T > copy(const SimpleTensor< T > &src, const TensorShape &output_shape)
Definition: Copy.cpp:37
K
unsigned int K
Definition: CpuGemmAssemblyDispatch.cpp:97
arm_compute::test::validation::reference::conv2d_dft
SimpleTensor< T > conv2d_dft(const SimpleTensor< T > &src, const SimpleTensor< T > &w, const PadStrideInfo &conv_info)
Performs and DFT based convolution on a real input.
Definition: DFT.cpp:396
ci
const CPUInfo & ci
Definition: NEBatchNormalizationLayerKernel.cpp:52
arm_compute::Coordinates
Coordinates of an item.
Definition: Coordinates.h:37
tensor
CLTensor * tensor
Pointer to the auxiliary tensor.
Definition: ClWorkloadRuntime.cpp:66
arm_compute::test::validation::reference::FFTDirection
FFTDirection
Definition: DFT.h:38
arm_compute::test::validation::reference::ridft_2d
SimpleTensor< T > ridft_2d(const SimpleTensor< T > &src, bool is_odd)
Performs a two dimensional inverse DFT on a real input.
Definition: DFT.cpp:354
arm_compute::PadStrideInfo
Definition: CoreTypes.h:138
arm_compute::test::validation::reference::FFTDirection::Forward
@ Forward
arm_compute::test::SimpleTensor
Simple tensor object that stores elements in a consecutive chunk of memory.
Definition: SimpleTensor.h:58
arm_compute::test::validation::b
SimpleTensor< float > b
Definition: DFT.cpp:157
ToolchainSupport.h
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::test::validation::reference::reverse
SimpleTensor< T > reverse(const SimpleTensor< T > &src, const SimpleTensor< uint32_t > &axis)
Definition: Reverse.cpp:38
arm_compute::test::validation::conv_info
const auto conv_info
Definition: ConvolutionLayer.cpp:407
arm_compute::test::validation::is_odd
const bool is_odd
Definition: DFT.cpp:120
DFT.h
arm_compute::TensorShape::set
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:79
arm_compute::test::validation::reference::slice
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
Definition: SliceOperations.cpp:38
arm_compute::test::validation::input
auto input
Definition: LSTMLayerQuantized.cpp:486
arm_compute::test::validation::n
const unsigned int n
Definition: GEMMMatrixMultiplyNative.cpp:360
arm_compute::test::validation::reference::permute
SimpleTensor< T > permute(const SimpleTensor< T > &src, PermutationVector perm)
Definition: Permute.cpp:38