24.02.1
|
Go to the documentation of this file.
42 const ITensorInfo *output,
43 const ITensorInfo *idx,
44 const FFTDigitReverseKernelInfo &config)
53 if ((output !=
nullptr) && (output->total_size() != 0))
66 const FFTDigitReverseKernelInfo &config)
74 return std::make_pair(Status{}, win);
94 const size_t axis = config.
axis;
96 const bool is_input_complex = (
input->info()->num_channels() == 2);
101 INEKernel::configure(win_config.second);
105 if (is_input_complex)
109 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0<true, true>;
113 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0<true, false>;
118 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0<false, false>;
123 if (is_input_complex)
127 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1<true, true>;
131 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1<true, false>;
136 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1<false, false>;
156 template <
bool is_input_complex,
bool is_conj>
157 void NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0(
const Window &window)
162 std::vector<unsigned int> buffer_idx(
N);
163 std::copy_n(
reinterpret_cast<unsigned int *
>(_idx->
buffer()),
N, buffer_idx.data());
172 std::vector<float> buffer_row_out(2 *
N);
173 std::vector<float> buffer_row_in(2 *
N);
179 if (is_input_complex)
182 memcpy(buffer_row_in.data(),
reinterpret_cast<float *
>(in.ptr()), 2 *
N *
sizeof(
float));
185 for (
size_t x = 0; x < 2 *
N; x += 2)
187 size_t idx = buffer_idx[x / 2];
188 buffer_row_out[x] = buffer_row_in[2 * idx];
189 buffer_row_out[x + 1] = (is_conj ? -buffer_row_in[2 * idx + 1] : buffer_row_in[2 * idx + 1]);
195 memcpy(buffer_row_in.data(),
reinterpret_cast<float *
>(in.ptr()),
N *
sizeof(
float));
198 for (
size_t x = 0; x <
N; ++x)
200 size_t idx = buffer_idx[x];
201 buffer_row_out[2 * x] = buffer_row_in[idx];
206 memcpy(
reinterpret_cast<float *
>(out.ptr()), buffer_row_out.data(), 2 *
N *
sizeof(
float));
211 template <
bool is_input_complex,
bool is_conj>
212 void NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1(
const Window &window)
218 std::vector<unsigned int> buffer_idx(Ny);
219 std::copy_n(
reinterpret_cast<unsigned int *
>(_idx->
buffer()), Ny, buffer_idx.data());
224 Iterator out(_output,
slice);
227 std::vector<float> buffer_row(Nx);
235 [&](
const Coordinates &
id)
237 auto *out_ptr =
reinterpret_cast<float *
>(out.ptr());
238 auto *in_ptr =
reinterpret_cast<float *
>(_input->
buffer() +
id.z() * stride_z +
id[3] * stride_w);
239 const size_t y_shuffled = buffer_idx[
id.y()];
241 if (is_input_complex)
244 memcpy(out_ptr, in_ptr + 2 * Nx * y_shuffled, 2 * Nx *
sizeof(
float));
249 for (
size_t x = 0; x < 2 * Nx; x += 2)
251 out_ptr[x + 1] = -out_ptr[x + 1];
258 memcpy(buffer_row.data(), in_ptr + Nx * y_shuffled, Nx *
sizeof(
float));
261 for (
size_t x = 0; x < 2 * Nx; x += 2)
263 out_ptr[x] = buffer_row[x / 2];
NEFFTDigitReverseKernel()
Constructor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config)
Static function to check if given info will lead to a valid configuration of NEFFTDigitReverseKernel.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Descriptor for FFT digit reverse kernels.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Interface for CPU tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
unsigned int axis
Axis to perform the kernel on.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
@ U32
unsigned 32-bit number
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Iterator updated by execute_window_loop for each window element.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
std::pair< Status, Window > validate_and_configure_window(ITensorInfo *src, ITensorInfo *dst)
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
const Window & window() const
The maximum window the kernel can be executed on.
Information about executing thread and CPU.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
Describe a multidimensional execution window.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Copyright (c) 2017-2024 Arm Limited.
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
bool conjugate
Flag to conjugate the output/.
Store the tensor's metadata.
@ F32
32-bit floating-point number
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config)
Set the input and output tensors.
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory.