ComputeLibrary/v21.05/_n_e_normalization_layer_kernel_8cpp_source.xhtml

 /*
  * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "src/core/NEON/kernels/NENormalizationLayerKernel.h"

 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
 #include "src/core/CPP/Validate.h"
 #include "src/core/NEON/NEFixedPoint.h"
 #include "src/core/NEON/NEMath.h"
 #include "src/core/NEON/wrapper/wrapper.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/NormalizationHelpers.h"
 #include "src/core/helpers/WindowHelpers.h"

 namespace arm_compute
 {
 namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, const NormalizationLayerInfo &norm_info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_squared, output);
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);

     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, input_squared);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");

     // Checks performed when output is configured
     if(output->total_size() != 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
     }

     return Status{};
 }

 } // namespace

 NENormalizationLayerKernel::NENormalizationLayerKernel()
     : _func(nullptr), _input(nullptr), _input_squared(nullptr), _output(nullptr), _norm_info(NormType::IN_MAP_1D)
 {
 }

 void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_squared, output);
     // Output tensor auto initialization if not yet initialized
     auto_init_if_empty(*output->info(), *input->info());

     // Perform validation step
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), input_squared->info(), output->info(), norm_info));

     const unsigned int norm_idx = get_normalization_dimension_index(input->info()->data_layout(), norm_info);

     _input         = input;
     _input_squared = input_squared;
     _output        = output;
     _norm_info     = norm_info;

     switch(_input->info()->data_type())
     {
         case DataType::F32:
         {
             switch(norm_idx)
             {
                 case 0:
                 {
                     if(norm_info.type() == NormType::IN_MAP_2D)
                     {
                         _func = &NENormalizationLayerKernel::normalize_float<float, 4, 0, true>;
                     }
                     else
                     {
                         _func = &NENormalizationLayerKernel::normalize_float<float, 4, 0, false>;
                     }
                     break;
                 }
                 case 1:
                     if(norm_info.type() == NormType::IN_MAP_2D)
                     {
                         _func = &NENormalizationLayerKernel::normalize_float<float, 4, 1, true>;
                     }
                     else
                     {
                         _func = &NENormalizationLayerKernel::normalize_float<float, 4, 1, false>;
                     }
                     break;
                 case 2:
                     _func = &NENormalizationLayerKernel::normalize_float<float, 4, 2, false>;
                     break;
                 default:
                     break;
             }
             break;
         }
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
         case DataType::F16:
         {
             switch(norm_idx)
             {
                 case 0:
                 {
                     if(norm_info.type() == NormType::IN_MAP_2D)
                     {
                         _func = &NENormalizationLayerKernel::normalize_float<float16_t, 8, 0, true>;
                     }
                     else
                     {
                         _func = &NENormalizationLayerKernel::normalize_float<float16_t, 8, 0, false>;
                     }
                     break;
                 }
                 case 1:
                     if(norm_info.type() == NormType::IN_MAP_2D)
                     {
                         _func = &NENormalizationLayerKernel::normalize_float<float16_t, 8, 1, true>;
                     }
                     else
                     {
                         _func = &NENormalizationLayerKernel::normalize_float<float16_t, 8, 1, false>;
                     }
                     break;
                 case 2:
                     _func = &NENormalizationLayerKernel::normalize_float<float16_t, 8, 2, false>;
                     break;
                 default:
                     break;
             }
             break;
         }
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
         default:
             ARM_COMPUTE_ERROR("NOT SUPPORTED!");
     }

     // Configure kernel window
     Window win = calculate_max_window(*input->info(), Steps());
     INEKernel::configure(win);
 }

 template <typename T, unsigned int S, unsigned int dim, bool do_2D_norm>
 void NENormalizationLayerKernel::normalize_float(const Window &window)
 {
     /** SIMD vector tag type. */
     using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;

     Window win(window);
     win.set(Window::DimX, Window::Dimension(0, 1, 1));

     const auto window_start_x = static_cast<int>(window.x().start());
     const auto window_end_x   = static_cast<int>(window.x().end());
     const int  window_step_x  = S;

     Iterator input(_input, win);
     Iterator input_squared(_input_squared, win);
     Iterator output(_output, win);

     const int dim_y                      = _input->info()->data_layout() == DataLayout::NCHW ? 1 : 2;
     const int radius                     = _norm_info.norm_size() / 2;
     const int input_squared_stride_x     = _input_squared->info()->strides_in_bytes()[0];
     const int input_squared_stride_slice = _input_squared->info()->strides_in_bytes()[dim];
     const int input_squared_stride_row   = _input_squared->info()->strides_in_bytes()[dim_y];

     const int max_right  = _input->info()->dimension(dim) - 1;
     const int max_bottom = _input->info()->dimension(dim_y) - 1;

     const auto coeff_vec = wrapper::vdup_n(static_cast<T>(_norm_info.scale_coeff()), ExactTagType{});
     const auto beta_vec  = wrapper::vdup_n(static_cast<T>(_norm_info.beta()), ExactTagType{});
     const auto kappa_vec = wrapper::vdup_n(static_cast<T>(_norm_info.kappa()), ExactTagType{});

     auto sequential_normalization = [&](const int x, const Coordinates & id, const int current_row, const int first_row, const int last_row, const T * input_ptr, const uint8_t *input_squared_start_ptr,
                                         T * output_ptr)
     {
         const int current_slice = dim == 0 ? x : id[dim];
         const int first_slice   = std::max(current_slice - radius, 0);
         const int last_slice    = std::min(current_slice + radius, max_right);

         const uint8_t *const input_squared_x_ptr = input_squared_start_ptr + x * input_squared_stride_x;
         // Accumulate 2D In-Map values
         auto accu = static_cast<T>(0.f);
         for(int j = first_row; j <= last_row; ++j)
         {
             // Compute row displacement
             const uint8_t *const input_squared_ptr = input_squared_x_ptr + (j - current_row) * input_squared_stride_row;
             for(int i = first_slice; i <= last_slice; ++i)
             {
                 accu += *reinterpret_cast<const T *>(input_squared_ptr + (i - current_slice) * input_squared_stride_slice);
             }
         }

         // Normalize
         const auto normalized       = std::pow(accu * static_cast<T>(_norm_info.scale_coeff()) + static_cast<T>(_norm_info.kappa()), _norm_info.beta());
         const auto normalized_pixel = (*(input_ptr + x)) / normalized;
         *(output_ptr + x)           = normalized_pixel;
     };

     execute_window_loop(win, [&](const Coordinates & id)
     {
         const auto input_ptr  = reinterpret_cast<const T *>(input.ptr());
         auto       output_ptr = reinterpret_cast<T *>(output.ptr());

         // Get range to normalize
         const int current_row = do_2D_norm ? id[dim_y] : 0;
         const int first_row   = do_2D_norm ? std::max(current_row - radius, 0) : 0;
         const int last_row    = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0;

         int x = window_start_x;
         // Compute serially starting elements for the case x dimension is width
         for(; x < radius && x < window_end_x && dim == 0; ++x)
         {
             sequential_normalization(x, id, current_row, first_row, last_row, input_ptr, input_squared.ptr(), output_ptr);
         }

         // Compute vectorized
         for(; x <= window_end_x - window_step_x - radius; x += window_step_x)
         {
             const int current_slice = dim == 0 ? x : id[dim];
             const int first_slice   = std::max(current_slice - radius, 0);
             const int last_slice    = std::min(current_slice + radius, max_right);

             const uint8_t *const input_squared_x_ptr = input_squared.ptr() + x * input_squared_stride_x;
             // Accumulate 2D In-Map values
             auto accu = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
             for(int j = first_row; j <= last_row; ++j)
             {
                 // Compute row displacement
                 const uint8_t *const input_squared_ptr = input_squared_x_ptr + (j - current_row) * input_squared_stride_row;
                 for(int i = first_slice; i <= last_slice; ++i)
                 {
                     accu = wrapper::vadd(accu, wrapper::vloadq(reinterpret_cast<const T *>(input_squared_ptr + (i - current_slice) * input_squared_stride_slice)));
                 }
             }

             // Normalize
             const auto normalized       = wrapper::vpow(wrapper::vmla(kappa_vec, coeff_vec, accu), beta_vec);
             const auto normalized_pixel = wrapper::vmul(wrapper::vloadq(input_ptr + x), wrapper::vinv(normalized));
             wrapper::vstore(reinterpret_cast<T *>(output_ptr + x), normalized_pixel);
         }

         // Compute left-over elements
         for(; x < window_end_x; ++x)
         {
             sequential_normalization(x, id, current_row, first_row, last_row, input_ptr, input_squared.ptr(), output_ptr);
         }
     },
     input, input_squared, output);
 }

 Status NENormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, const NormalizationLayerInfo norm_info)
 {
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, input_squared, output, norm_info));

     return Status{};
 }

 void NENormalizationLayerKernel::run(const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
     ARM_COMPUTE_ERROR_ON(_func == nullptr);

     // Run function
     (this->*_func)(window);
 }
 } // namespace arm_compute
arm_compute::NENormalizationLayerKernel::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, NormalizationLayerInfo norm_info)
Static function to check if given info will lead to a valid configuration of NENormalizationLayerKern...
Definition: NENormalizationLayerKernel.cpp:277

arm_compute::NormalizationLayerInfo::scale_coeff
float scale_coeff() const
Return the scaling factor of the normalization function.
Definition: Types.h:1639

arm_compute::calculate_max_window
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Definition: WindowHelpers.cpp:28

WindowHelpers.h

arm_compute::IKernel::window
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28

arm_compute::NormalizationLayerInfo::kappa
float kappa() const
Get the kappa value.
Definition: Types.h:1613

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:490

ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
Definition: Validate.h:108

arm_compute::ITensorInfo::dimension
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.

ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352

arm_compute::NormalizationLayerInfo::norm_size
uint32_t norm_size() const
Get the normalization size.
Definition: Types.h:1598

arm_compute::NormalizationLayerInfo::type
NormType type() const
Get the normalization type.
Definition: Types.h:1593

arm_compute::wrapper::vloadq
uint8x16_t vloadq(const uint8_t *ptr)
Definition: load.h:58

ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204

arm_compute::wrapper::vadd
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:39

Window.h

arm_compute::Format::F32
1 channel, 1 F32 per channel

arm_compute::NormalizationLayerInfo
Normalization Layer Information class.
Definition: Types.h:1575

ARM_COMPUTE_ERROR_ON
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466

arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:40

arm_compute::wrapper::vinv
float32x2_t vinv(const float32x2_t &a)
Definition: inv.h:47

ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455

arm_compute::Window::Dimension
Describe one of the image's dimensions with a start, end and step.
Definition: Window.h:77

arm_compute::wrapper::vpow
float32x4_t vpow(const float32x4_t &a, const float32x4_t &b)
Definition: pow.h:40

arm_compute::Status
Status class.
Definition: Error.h:52

TensorInfo.h

arm_compute::ITensor
Interface for CPU tensor.
Definition: ITensor.h:36

arm_compute::NENormalizationLayerKernel::NENormalizationLayerKernel
NENormalizationLayerKernel()
Default constructor.
Definition: NENormalizationLayerKernel.cpp:67

arm_compute
Copyright (c) 2017-2021 Arm Limited.
Definition: introduction.dox:24

arm_compute::Format::F16
1 channel, 1 F16 per channel

arm_compute::test::validation::input
auto input
Definition: LSTMLayerQuantized.cpp:486

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159

Utils.h

arm_compute::Window::DimX
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43

ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152

arm_compute::wrapper::traits::neon_vector
Create the appropriate SIMD vector given its type and size in terms of elements.
Definition: traits.h:48

arm_compute::NormType::IN_MAP_1D
Normalization applied within the same map in 1D region.

arm_compute::Steps
Class to describe a number of elements in each dimension.
Definition: Steps.h:40

NENormalizationLayerKernel.h

arm_compute::auto_init_if_empty
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: AutoConfiguration.h:42

Validate.h

arm_compute::ITensor::info
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.

arm_compute::NormalizationLayerInfo::beta
float beta() const
Get the beta value.
Definition: Types.h:1608

arm_compute::NENormalizationLayerKernel::run
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Definition: NENormalizationLayerKernel.cpp:284

Error.h

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:915

arm_compute::DataLayout::NCHW
Num samples, channels, height, width.

arm_compute::test::validation::info
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)

arm_compute::wrapper::vmul
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
Definition: mul.h:39

AutoConfiguration.h

arm_compute::ThreadInfo
Information about executing thread and CPU.
Definition: CPPTypes.h:252

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:439

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788

arm_compute::NENormalizationLayerKernel::configure
void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info)
Set the input and output tensors.
Definition: NENormalizationLayerKernel.cpp:72

arm_compute::validate_arguments
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
Definition: NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp:45

NEMath.h

Validate.h

arm_compute::wrapper::vstore
void vstore(uint8_t *ptr, uint8x8_t val)
Definition: store.h:39

NormalizationHelpers.h

ARM_COMPUTE_RETURN_ERROR_ON_MSG
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244

ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157

Helpers.h

arm_compute::wrapper::vdup_n
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
Definition: dup_n.h:41

arm_compute::execute_window_loop
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77

wrapper.h
Includes all wrapper headers at once.

arm_compute::ITensorInfo::strides_in_bytes
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.

arm_compute::wrapper::vmla
uint8x8_t vmla(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
Definition: mla.h:46

arm_compute::Window::Dimension::end
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:99

arm_compute::Iterator
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46

NEFixedPoint.h

arm_compute::get_normalization_dimension_index
unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info)
Calculate the normalization dimension index for a given normalization type.
Definition: NormalizationHelpers.h:39

arm_compute::Window::Dimension::start
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:94

arm_compute::NormType
NormType
The normalization type used for the normalization layer.
Definition: Types.h:505

arm_compute::Window
Describe a multidimensional execution window.
Definition: Window.h:39

arm_compute::NormType::IN_MAP_2D
Normalization applied within the same map in 2D region.

ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:201

arm_compute::ITensorInfo::data_layout
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.

arm_compute::Window::x
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:145