40 Status
validate_arguments(
const ITensorInfo *
input,
const ITensorInfo *output,
const ITensorInfo *idx,
const FFTDigitReverseKernelInfo &config)
49 if((output !=
nullptr) && (output->total_size() != 0))
59 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *
input, ITensorInfo *output, ITensorInfo *idx,
const FFTDigitReverseKernelInfo &config)
67 return std::make_pair(Status{}, win);
72 : _func(nullptr), _input(nullptr), _output(nullptr), _idx(nullptr)
85 const size_t axis = config.
axis;
87 const bool is_input_complex = (
input->info()->num_channels() == 2);
90 auto win_config = validate_and_configure_window(
input->info(), output->
info(), idx->
info(), config);
92 INEKernel::configure(win_config.second);
100 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0<true, true>;
104 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0<true, false>;
109 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0<false, false>;
118 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1<true, true>;
122 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1<true, false>;
127 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1<false, false>;
143 template <
bool is_input_complex,
bool is_conj>
144 void NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0(
const Window &window)
149 std::vector<unsigned int> buffer_idx(
N);
150 std::copy_n(reinterpret_cast<unsigned int *>(_idx->
buffer()),
N, buffer_idx.data());
159 std::vector<float> buffer_row_out(2 *
N);
160 std::vector<float> buffer_row_in(2 *
N);
167 memcpy(buffer_row_in.data(), reinterpret_cast<float *>(in.ptr()), 2 *
N *
sizeof(
float));
170 for(
size_t x = 0; x < 2 *
N; x += 2)
172 size_t idx = buffer_idx[x / 2];
173 buffer_row_out[x] = buffer_row_in[2 * idx];
174 buffer_row_out[x + 1] = (is_conj ? -buffer_row_in[2 * idx + 1] : buffer_row_in[2 * idx + 1]);
180 memcpy(buffer_row_in.data(), reinterpret_cast<float *>(in.ptr()),
N *
sizeof(
float));
183 for(
size_t x = 0; x <
N; ++x)
185 size_t idx = buffer_idx[x];
186 buffer_row_out[2 * x] = buffer_row_in[idx];
191 memcpy(reinterpret_cast<float *>(out.ptr()), buffer_row_out.data(), 2 *
N *
sizeof(float));
196 template <
bool is_input_complex,
bool is_conj>
197 void NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1(
const Window &window)
203 std::vector<unsigned int> buffer_idx(Ny);
204 std::copy_n(reinterpret_cast<unsigned int *>(_idx->
buffer()), Ny, buffer_idx.data());
209 Iterator out(_output,
slice);
212 std::vector<float> buffer_row(Nx);
220 auto *out_ptr = reinterpret_cast<float *>(out.ptr());
221 auto *in_ptr = reinterpret_cast<float *>(_input->
buffer() +
id.z() * stride_z +
id[3] * stride_w);
222 const size_t y_shuffled = buffer_idx[
id.y()];
227 memcpy(out_ptr, in_ptr + 2 * Nx * y_shuffled, 2 * Nx *
sizeof(
float));
232 for(
size_t x = 0; x < 2 * Nx; x += 2)
234 out_ptr[x + 1] = -out_ptr[x + 1];
241 memcpy(buffer_row.data(), in_ptr + Nx * y_shuffled, Nx *
sizeof(float));
244 for(
size_t x = 0; x < 2 * Nx; x += 2)
246 out_ptr[x] = buffer_row[x / 2];
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
const Window & window() const
The maximum window the kernel can be executed on.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
1 channel, 1 F32 per channel
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config)
Static function to check if given info will lead to a valid configuration of NEFFTDigitReverseKernel.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Interface for CPU tensor.
Copyright (c) 2017-2021 Arm Limited.
NEFFTDigitReverseKernel()
Constructor.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
1 channel, 1 U32 per channel
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory.
bool conjugate
Flag to conjugate the output/.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
unsigned int axis
Axis to perform the kernel on.
void configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config)
Set the input and output tensors.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Descriptor for FFT digit reverse kernels.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
Iterator updated by execute_window_loop for each window element.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)