50 if((output !=
nullptr) && (output->total_size() != 0))
58 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
72 output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
75 return std::make_pair(Status{}, win);
79 template <
typename ScalarType,
int size>
80 void NEMeanStdDevNormalizationKernel::mean_stddev_normalization(
const Window &window)
82 using ExactTagType =
typename wrapper::traits::neon_vector<ScalarType, size>::tag_type;
88 const int window_step_x = size;
89 const auto window_start_x =
static_cast<int>(window.x().start());
90 const auto window_end_x =
static_cast<int>(window.x().end());
92 Iterator
input(_input, win);
93 Iterator output(_output, win);
97 int x = window_start_x;
98 auto in_ptr =
reinterpret_cast<const ScalarType *
>(input.ptr());
99 auto out_ptr =
reinterpret_cast<ScalarType *
>(output.ptr());
101 auto sum_vec =
wrapper::vdup_n(static_cast<ScalarType>(0.f), ExactTagType{});
102 auto sum_sq_vec =
wrapper::vdup_n(static_cast<ScalarType>(0.f), ExactTagType{});
104 for(; x <= (window_end_x - window_step_x); x += window_step_x)
113 for(
int i = 0; i < size / 4; ++i)
116 sum_sq_carry_res =
wrapper::vpadd(sum_sq_carry_res, sum_sq_carry_res);
123 for(; x < window_end_x; ++x)
125 ScalarType data = *(in_ptr + x);
127 sum_sq += data * data;
131 ScalarType var = (sum_sq / _input->
info()->
dimension(0)) - (mean * mean);
132 ScalarType stddev_inv = 1.f / sqrt(var + _epsilon);
136 for(x = window_start_x; x <= (window_end_x - window_step_x); x += window_step_x)
143 for(; x < window_end_x; ++x)
145 *(out_ptr + x) = (*(in_ptr + x) - mean) * stddev_inv;
152 : _input(nullptr), _output(nullptr), _epsilon(1e-8f), _func(nullptr)
163 _output = (output ==
nullptr) ? input : output;
167 auto win_config = validate_and_configure_window(input->
info(), (output ==
nullptr) ?
nullptr : output->
info());
169 ICPPKernel::configure(win_config.second);
176 _func = &NEMeanStdDevNormalizationKernel::mean_stddev_normalization<float, 4>;
178 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 180 _func = &NEMeanStdDevNormalizationKernel::mean_stddev_normalization<float16_t, 8>;
182 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 203 (this->*_func)(window);
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
NEMeanStdDevNormalizationKernel()
Default constructor.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
uint8x16_t vloadq(const uint8_t *ptr)
DATA_TYPE sum(__global const DATA_TYPE *input)
Calculate sum of a vector.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
uint8x8_t vsub(const uint8x8_t &a, const uint8x8_t &b)
Interface for Neon tensor.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
uint8x8_t vpadd(const uint8x8_t &a, const uint8x8_t &b)
uint8_t vgetlane(const uint8x8_t vector, const unsigned int lane)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
uint8x8_t vgetlow(const uint8x16_t val)
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
uint8x8_t vgethigh(const uint8x16_t val)
static Status validate(const ITensorInfo *input, const ITensorInfo *output=nullptr, float epsilon=1e-8f)
Static function to check if given info will lead to a valid configuration of NEMeanStdDevNormalizatio...
void configure(ITensor *input, ITensor *output=nullptr, float epsilon=1e-8f)
Initialise the kernel's input and outputs.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
Information about executing thread and CPU.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
void vstore(uint8_t *ptr, uint8x8_t val)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Includes all wrapper headers at once.
DataType
Available data types.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)