40 struct BroadcastUnroll
43 static void unroll(
const SimpleTensor<T> &src1,
const SimpleTensor<T> &src2, SimpleTensor<T> &
dst,
44 Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
46 const bool src1_is_broadcast = (src1.shape()[dim - 1] !=
dst.shape()[dim - 1]);
47 const bool src2_is_broadcast = (src2.shape()[dim - 1] !=
dst.shape()[dim - 1]);
49 id_src1.set(dim - 1, 0);
50 id_src2.set(dim - 1, 0);
51 id_dst.set(dim - 1, 0);
53 #pragma omp parallel for
55 for(
size_t i = 0; i <
dst.shape()[dim - 1]; ++i)
57 BroadcastUnroll < dim - 1 >::unroll(src1, src2,
dst, id_src1, id_src2, id_dst);
59 id_src1[dim - 1] += !src1_is_broadcast;
60 id_src2[dim - 1] += !src2_is_broadcast;
67 struct BroadcastUnroll<0>
70 static void unroll(
const SimpleTensor<T> &src1,
const SimpleTensor<T> &src2, SimpleTensor<T> &
dst,
71 Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
87 BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2,
dst, id_src1, id_src2, id_dst);