40 using BatchNomalizationPtr = void (*)(ITensor *
src,
48 const Window &window);
51 void batch_normalization(ITensor *
src,
62 using ExactTagType =
typename wrapper::traits::neon_bitvector_tag_t<float, wrapper::traits::BitWidth::W128>;
64 const int window_step_x = 4;
65 const auto window_start_x =
static_cast<int>(window.x().start());
66 const auto window_end_x =
static_cast<int>(window.x().end());
68 Window win_collapsed = window.collapse_if_possible(window,
Window::DimZ);
69 win_collapsed.set(
Window::DimX, Window::Dimension(0, 1, 1));
72 Iterator output(
dst, win_collapsed);
74 const auto input_mean =
reinterpret_cast<const float *
>(mean->ptr_to_element(Coordinates(0, 0)));
75 const auto input_var =
reinterpret_cast<const float *
>(var->ptr_to_element(Coordinates(0, 0)));
76 const auto input_gamma =
77 (gamma !=
nullptr) ?
reinterpret_cast<const float *
>(gamma->ptr_to_element(Coordinates(0, 0))) :
nullptr;
78 const auto input_beta =
79 (beta !=
nullptr) ?
reinterpret_cast<const float *
>(beta->ptr_to_element(Coordinates(0, 0))) :
nullptr;
86 [&](
const Coordinates &)
88 const auto input_ptr =
reinterpret_cast<const float *
>(
input.ptr());
89 const auto output_ptr =
reinterpret_cast<float *
>(output.ptr());
92 int x = window_start_x;
93 for (; x <= (window_end_x - window_step_x); x += window_step_x)
98 const auto gamma_vec = (input_gamma !=
nullptr)
101 const auto beta_vec = (input_beta !=
nullptr)
116 activation_functor(res);
124 for (; x < window_end_x; ++x)
127 const float gamma = (input_gamma !=
nullptr) ? input_gamma[x] : 1.f;
128 const float beta = (input_beta !=
nullptr) ? input_beta[x] : 0.f;
130 const float denominator = sqrt(input_var[x] +
epsilon);
131 const float numerator = input_ptr[x] - input_mean[x];
132 const float x_bar = numerator / denominator;
133 float res = beta + x_bar * gamma;
138 activation_functor(res);
142 *
reinterpret_cast<float *
>(output_ptr + x) = res;
149 static std::map<ActivationLayerInfo::ActivationFunction, BatchNomalizationPtr> fused_map = {
150 {ActivationLayerInfo::ActivationFunction::RELU, &batch_normalization<detail::relu<float, 4>>},
151 {ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, &batch_normalization<detail::brelu<float, 4>>},
152 {ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, &batch_normalization<detail::lubrelu<float, 4>>}};
172 batch_normalization<detail::dummy<float, 4>>(
src,
dst, mean, var, beta, gamma,
epsilon,
act_info, window);