39 const T *mtx_in =
reinterpret_cast<const T *
>(in.
data());
40 T *mtx_ref =
reinterpret_cast<T *
>(out.
data());
41 const int32_t in_rows = in.
shape().y();
42 const int32_t in_cols = in.
shape().x();
43 const int32_t out_stride = out.
shape().x();
45 for(; y <= (in_rows - 4); y += 4)
47 const T *in_ptr = &mtx_in[y * in_cols];
49 for(int32_t x = 0; x < in_cols; x++)
51 const T tmp[4] = { in_ptr[x + 0 * in_cols],
52 in_ptr[x + 1 * in_cols],
53 in_ptr[x + 2 * in_cols],
54 in_ptr[x + 3 * in_cols]
57 T *
dst = &mtx_ref[
static_cast<size_t>(x * 4.f) +
static_cast<size_t>(std::ceil(y / 4.f)) * out_stride];
58 memcpy(
dst, tmp,
sizeof(T) * 4);
63 const int32_t leftover_y = in_rows - y;
67 const T *in_ptr = &mtx_in[y * in_cols];
69 for(int32_t x = 0; x < in_cols; x++)
71 T tmp[4] = { 0, 0, 0, 0 };
73 for(int32_t k = 0; k < leftover_y; k++)
75 tmp[k] = in_ptr[k * in_cols + x];
77 T *
dst = &mtx_ref[
static_cast<size_t>(x * 4.f) +
static_cast<size_t>(std::ceil(y / 4.f)) * out_stride];
78 memcpy(
dst, tmp,
sizeof(T) * 4);