37 template <
typename T1,
typename T2>
42 library->fill_tensor_uniform(
dst, 1,
static_cast<T2
>(0),
static_cast<T2
>(std::numeric_limits<T1>::max()));
46 #pragma omp parallel for
48 for(
int i = 0; i <
src.num_elements(); ++i)
50 intermediate_type val =
static_cast<intermediate_type
>(
src[i]) +
static_cast<intermediate_type
>(
dst[i]);
51 dst[i] = saturate_cast<T2>(val);
57 template <
typename T1,
typename T2>
60 ARM_COMPUTE_ERROR_ON_MSG(alpha < 0.f || alpha > 1.f,
"Weight (alpha) specified in accumulate_weighted must be within the range [0, 1]");
64 library->fill_tensor_uniform(
dst, 1,
static_cast<T2
>(0),
static_cast<T2
>(std::numeric_limits<T1>::max()));
68 #pragma omp parallel for
70 for(
int i = 0; i <
src.num_elements(); ++i)
72 double val = (1. -
static_cast<double>(alpha)) *
static_cast<intermediate_type
>(
dst[i]) +
static_cast<double>(alpha) *
static_cast<intermediate_type
>(
src[i]);
73 dst[i] =
static_cast<T2
>(val);
79 template <
typename T1,
typename T2>
86 library->fill_tensor_uniform(
dst, 1,
static_cast<T2
>(0),
static_cast<T2
>(std::numeric_limits<T1>::max()));
89 intermediate_type denom = 1 << shift;
91 #pragma omp parallel for
93 for(
int i = 0; i <
src.num_elements(); ++i)
95 intermediate_type val =
static_cast<intermediate_type
>(
dst[i]) + (
static_cast<intermediate_type
>(
src[i]) *
static_cast<intermediate_type
>(
src[i]) / denom);
96 dst[i] = saturate_cast<T2>(val);