37 struct is_floating_point
38 : std::integral_constant < bool,
39 std::is_same<float, typename std::remove_cv<T>::type>::value || std::is_same<half_float::half, typename std::remove_cv<T>::type>::value
40 || std::is_same<double, typename std::remove_cv<T>::type>::value || std::is_same<long double, typename std::remove_cv<T>::type>::value >
46 constexpr
float scale1_constant = 1.f;
57 template <
typename T1,
typename T2,
typename T3>
62 const double val =
static_cast<intermediate_type
>(src1) *
static_cast<intermediate_type
>(src2) *
static_cast<double>(
scale);
64 if(is_floating_point<T3>::value)
66 const auto result =
static_cast<T3
>(val);
72 double rounded_val = 0;
73 switch(rounding_policy)
88 const auto result =
static_cast<T3
>((convert_policy ==
ConvertPolicy::SATURATE) ? saturate_cast<T3>(rounded_val) : rounded_val);
97 const int64_t intermediate_val =
static_cast<int64_t
>(src1) *
static_cast<int64_t
>(src2);
99 if(std::abs(
scale - scale1_constant) < 0.00001f)
105 return saturate_cast<int32_t>(intermediate_val);
110 const auto i32_hi =
static_cast<int64_t
>(std::numeric_limits<int32_t>::max());
112 const auto i32_wi =
static_cast<int64_t
>(1) << 32;
113 int64_t wrapped_rounded_val = intermediate_val - i32_wi *
static_cast<int64_t
>(
support::cpp11::trunc(
static_cast<double>(intermediate_val) / i32_wi));
114 if(wrapped_rounded_val <= i32_hi)
116 return static_cast<int32_t
>(wrapped_rounded_val);
121 return static_cast<int32_t
>((wrapped_rounded_val - i32_hi) + i32_lo - 1);
130 int scale_exponent = 0;
131 std::frexp(
scale, &scale_exponent);
134 scale_exponent = std::abs(scale_exponent - 1);
135 const double scale_inv =
static_cast<int64_t
>(1) << scale_exponent;
136 const double val = intermediate_val / scale_inv;
138 double rounded_val = 0;
139 switch(rounding_policy)
156 return saturate_cast<int32_t>(rounded_val);
161 const auto i32_hi =
static_cast<double>(std::numeric_limits<int32_t>::max());
163 const auto i32_wi =
static_cast<double>(
static_cast<int64_t
>(1) << 32);
164 double wrapped_rounded_val = rounded_val - i32_wi * std::floor(rounded_val / i32_wi);
165 if(wrapped_rounded_val <= i32_hi)
167 return static_cast<int32_t
>(wrapped_rounded_val);
172 return static_cast<int32_t
>((wrapped_rounded_val - i32_hi) + i32_lo - 1);
178 template <
size_t dim>
179 struct BroadcastUnroll
181 template <
typename T1,
typename T2,
typename T3>
182 static void unroll(
const SimpleTensor<T1> &src1,
const SimpleTensor<T2> &src2, SimpleTensor<T3> &
dst,
184 Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
186 const bool src1_is_broadcast = (src1.shape()[dim - 1] !=
dst.shape()[dim - 1]);
187 const bool src2_is_broadcast = (src2.shape()[dim - 1] !=
dst.shape()[dim - 1]);
189 id_src1.set(dim - 1, 0);
190 id_src2.set(dim - 1, 0);
191 id_dst.set(dim - 1, 0);
193 for(
size_t i = 0; i <
dst.shape()[dim - 1]; ++i, ++id_dst[dim - 1])
195 BroadcastUnroll < dim - 1 >::unroll(src1, src2,
dst,
scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
197 id_src1[dim - 1] += !src1_is_broadcast;
198 id_src2[dim - 1] += !src2_is_broadcast;
204 struct BroadcastUnroll<0>
206 template <
typename T1,
typename T2,
typename T3>
207 static void unroll(
const SimpleTensor<T1> &src1,
const SimpleTensor<T2> &src2, SimpleTensor<T3> &
dst,
209 Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
216 template <
typename T1,
typename T2,
typename T3>
233 BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2,
dst,
scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
249 dst = convert_to_asymmetric<uint8_t>(dst_tmp, qout);
261 BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2,
dst,
scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
277 dst = convert_to_symmetric<int16_t>(dst_tmp, qout);
287 Coordinates id_src2{};
288 Coordinates id_dst{};
289 BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2,
dst,
scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
305 dst = convert_to_asymmetric<int8_t>(dst_tmp, qout);
317 BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2,
dst,
scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
333 dst = convert_to_symmetric<int16_t>(dst_tmp, qout);
345 BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2,
dst,
scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);