25 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
45 const auto window_start_x =
static_cast<int>(window.x().start());
46 const auto window_end_x =
static_cast<int>(window.x().end());
49 Window win_collapsed = window.collapse_if_possible(window,
Window::DimZ);
50 win_collapsed.set(
Window::DimX, Window::Dimension(0, 1, 1));
53 Iterator output(
dst, win_collapsed);
55 const auto const_1 = svdup_n_f16(1.f);
56 const auto const_0 = svdup_n_f16(0.f);
57 const auto const_6 = svdup_n_f16(6.f);
58 const auto const_3 = svdup_n_f16(3.f);
59 const auto const_inv_6 = svdup_n_f16(0.166666667f);
61 const auto va = svdup_n_f16(
act_info.a());
62 const auto vb = svdup_n_f16(
act_info.b());
65 [&](
const Coordinates &)
67 const auto input_ptr =
reinterpret_cast<const float16_t *
>(
input.ptr());
68 const auto output_ptr =
reinterpret_cast<float16_t *
>(output.ptr());
72 int x = window_start_x;
73 svbool_t pg = svwhilelt_b16(x, window_end_x);
76 const auto vin = svld1_f16(pg, input_ptr + x);
79 case ActivationLayerInfo::ActivationFunction::ABS:
80 tmp = svabs_f16_z(pg, vin);
82 case ActivationLayerInfo::ActivationFunction::LINEAR:
83 tmp = svmla_f16_z(pg, vb, va, vin);
85 case ActivationLayerInfo::ActivationFunction::LOGISTIC:
86 tmp = svinv_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, svneg_f16_z(pg, vin))));
88 case ActivationLayerInfo::ActivationFunction::RELU:
89 tmp = svmax_f16_z(pg, const_0, vin);
91 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
92 tmp = svmin_f16_z(pg, va, svmax_f16_z(pg, const_0, vin));
94 case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
95 tmp = svmin_f16_z(pg, va, svmax_f16_z(pg, vb, vin));
97 case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
98 tmp = svadd_f16_z(pg, svmul_f16_z(pg, svmin_f16_z(pg, vin, const_0), va),
99 svmax_f16_z(pg, vin, const_0));
101 case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
102 tmp = svlog_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, vin)));
104 case ActivationLayerInfo::ActivationFunction::ELU:
105 tmp = svsel_f16(svcmpgt_f16(pg, vin, const_0), vin,
106 svmul_f16_z(pg, va, svsub_f16_z(pg, svexp_f16_z(pg, vin), const_1)));
108 case ActivationLayerInfo::ActivationFunction::SQRT:
109 tmp = svsqrt_f16_z(pg, vin);
111 case ActivationLayerInfo::ActivationFunction::SQUARE:
112 tmp = svmul_f16_z(pg, vin, vin);
114 case ActivationLayerInfo::ActivationFunction::TANH:
115 tmp = svmul_f16_z(pg, va, svtanh_f16_z(pg, svmul_f16_z(pg, vb, vin)));
117 case ActivationLayerInfo::ActivationFunction::IDENTITY:
120 case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
125 svmin_f16_z(pg, const_6, svmax_f16_z(pg, const_0, svadd_f16_z(pg, vin, const_3)))));
127 case ActivationLayerInfo::ActivationFunction::SWISH:
130 svinv_f16_z(pg, svadd_f16_z(pg, const_1,
131 svexp_f16_z(pg, svneg_f16_z(pg, svmul_f16_z(pg, va, vin))))));
136 svst1_f16(pg, output_ptr + x, tmp);
139 pg = svwhilelt_b16(x, window_end_x);
141 }
while (svptest_any(svptrue_b16(), pg));
148 const ActivationLayerInfo &
act_info,
149 const Window &window)
152 const auto window_start_x = window.x().start();
153 const auto window_end_x = window.x().end();
154 const auto size = window_end_x - window_start_x;
155 Window win_collapsed = window.collapse_if_possible(window,
Window::DimZ);
156 win_collapsed.set(
Window::DimX, Window::Dimension(0, 1, 1));
159 Iterator output(
dst, win_collapsed);
162 [&](
const Coordinates &)
164 const auto input_ptr =
reinterpret_cast<const uint16_t *
>(
input.ptr());
165 auto output_ptr =
reinterpret_cast<uint16_t *
>(output.ptr());
166 lut_u16_sve(
reinterpret_cast<const uint16_t *
>(
act_info.lut_fp16().data()), 1U ,
167 size, input_ptr + window_start_x, output_ptr + window_start_x);