42 const auto window_start_x =
static_cast<int>(window.
x().
start());
43 const auto window_end_x =
static_cast<int>(window.
x().
end());
52 const auto const_1 = svdup_n_f32(1.f);
53 const auto const_0 = svdup_n_f32(0.f);
54 const auto const_6 = svdup_n_f32(6.f);
55 const auto const_3 = svdup_n_f32(3.f);
56 const auto const_inv_6 = svdup_n_f32(0.166666667f);
57 const auto soft_relu_thresh = svdup_n_f32(16.63553047f);
59 const auto va = svdup_n_f32(
act_info.a());
60 const auto vb = svdup_n_f32(
act_info.b());
65 const auto input_ptr =
reinterpret_cast<const float *
>(
input.ptr());
66 const auto output_ptr =
reinterpret_cast<float *
>(output.
ptr());
71 int x = window_start_x;
72 svbool_t pg = svwhilelt_b32(x, window_end_x);
75 const auto vin = svld1_f32(pg, input_ptr + x);
78 case ActivationLayerInfo::ActivationFunction::ABS:
79 tmp = svabs_f32_z(pg, vin);
81 case ActivationLayerInfo::ActivationFunction::LINEAR:
82 tmp = svmla_f32_z(pg, vb, va, vin);
84 case ActivationLayerInfo::ActivationFunction::LOGISTIC:
85 tmp = svinv_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, svneg_f32_z(pg, vin))));
87 case ActivationLayerInfo::ActivationFunction::RELU:
88 tmp = svmax_f32_z(pg, const_0, vin);
90 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
91 tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, const_0, vin));
93 case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
94 tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, vb, vin));
96 case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
97 tmp = svadd_f32_z(pg, svmul_f32_z(pg, svmin_f32_z(pg, vin, const_0), va),
98 svmax_f32_z(pg, vin, const_0));
100 case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
101 tmp = svsel_f32(svcmpgt_f32(pg, vin, soft_relu_thresh), vin,
102 svlog_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, vin))));
104 case ActivationLayerInfo::ActivationFunction::ELU:
105 tmp = svsel_f32(svcmpgt_f32(pg, vin, const_0), vin,
106 svmul_f32_z(pg, va, svsub_f32_z(pg, svexp_f32_z(pg, vin), const_1)));
108 case ActivationLayerInfo::ActivationFunction::SQRT:
109 tmp = svsqrt_f32_z(pg, vin);
111 case ActivationLayerInfo::ActivationFunction::SQUARE:
112 tmp = svmul_f32_z(pg, vin, vin);
114 case ActivationLayerInfo::ActivationFunction::TANH:
115 tmp = svmul_f32_z(pg, va, svtanh_f32_z(pg, svmul_f32_z(pg, vb, vin)));
117 case ActivationLayerInfo::ActivationFunction::IDENTITY:
120 case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
125 svmin_f32_z(pg, const_6, svmax_f32_z(pg, const_0, svadd_f32_z(pg, vin, const_3)))));
127 case ActivationLayerInfo::ActivationFunction::SWISH:
130 svinv_f32_z(pg, svadd_f32_z(pg, const_1,
131 svexp_f32_z(pg, svneg_f32_z(pg, svmul_f32_z(pg, va, vin))))));
136 svst1_f32(pg, output_ptr + x, tmp);
139 pg = svwhilelt_b32(x, window_end_x);
141 }
while (svptest_any(svptrue_b32(), pg));