50 if((output !=
nullptr) && (output->total_size() != 0))
58 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *
input, ITensorInfo *output)
71 return std::make_pair(Status{}, win);
75 template <
typename ScalarType,
int size>
76 void NEMeanStdDevNormalizationKernel::mean_stddev_normalization(
const Window &window)
78 using ExactTagType =
typename wrapper::traits::neon_vector<ScalarType, size>::tag_type;
84 const int window_step_x = size;
85 const auto window_start_x = static_cast<int>(
window.
x().
start());
86 const auto window_end_x = static_cast<int>(
window.
x().
end());
88 Iterator
input(_input, win);
89 Iterator output(_output, win);
93 int x = window_start_x;
94 auto in_ptr = reinterpret_cast<const ScalarType *>(
input.ptr());
95 auto out_ptr = reinterpret_cast<ScalarType *>(output.ptr());
97 auto sum_vec =
wrapper::vdup_n(static_cast<ScalarType>(0.f), ExactTagType{});
98 auto sum_sq_vec =
wrapper::vdup_n(static_cast<ScalarType>(0.f), ExactTagType{});
100 for(; x <= (window_end_x - window_step_x); x += window_step_x)
109 for(
int i = 0; i < size / 4; ++i)
112 sum_sq_carry_res =
wrapper::vpadd(sum_sq_carry_res, sum_sq_carry_res);
119 for(; x < window_end_x; ++x)
121 ScalarType data = *(in_ptr + x);
123 sum_sq += data * data;
127 ScalarType var = (sum_sq / _input->
info()->
dimension(0)) - (mean * mean);
128 ScalarType stddev_inv = 1.f / sqrt(var + _epsilon);
132 for(x = window_start_x; x <= (window_end_x - window_step_x); x += window_step_x)
139 for(; x < window_end_x; ++x)
141 *(out_ptr + x) = (*(in_ptr + x) - mean) * stddev_inv;
148 : _input(nullptr), _output(nullptr), _epsilon(1e-8f), _func(nullptr)
159 _output = (output ==
nullptr) ?
input : output;
163 auto win_config = validate_and_configure_window(
input->info(), (output ==
nullptr) ?
nullptr : output->
info());
165 ICPPKernel::configure(win_config.second);
172 _func = &NEMeanStdDevNormalizationKernel::mean_stddev_normalization<float, 4>;
174 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 176 _func = &NEMeanStdDevNormalizationKernel::mean_stddev_normalization<float16_t, 8>;
178 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
NEMeanStdDevNormalizationKernel()
Default constructor.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
uint8x16_t vloadq(const uint8_t *ptr)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
uint8x8_t vsub(const uint8x8_t &a, const uint8x8_t &b)
Interface for CPU tensor.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
uint8x8_t vpadd(const uint8x8_t &a, const uint8x8_t &b)
uint8_t vgetlane(const uint8x8_t vector, const unsigned int lane)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
uint8x8_t vgetlow(const uint8x16_t val)
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
uint8x8_t vgethigh(const uint8x16_t val)
static Status validate(const ITensorInfo *input, const ITensorInfo *output=nullptr, float epsilon=1e-8f)
Static function to check if given info will lead to a valid configuration of NEMeanStdDevNormalizatio...
void configure(ITensor *input, ITensor *output=nullptr, float epsilon=1e-8f)
Initialise the kernel's input and outputs.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
Information about executing thread and CPU.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
void vstore(uint8_t *ptr, uint8x8_t val)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Includes all wrapper headers at once.
constexpr int end() const
Return the end of the dimension.
DataType
Available data types.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.