39 const int W = 16 /
sizeof(T);
40 const TensorShape shape_out(
static_cast<size_t>(in.
shape().y() * W),
static_cast<size_t>(std::ceil(in.
shape().x() /
static_cast<float>(W))));
42 const int32_t in_height = in.
shape().y();
43 const int32_t in_width = in.
shape().x();
44 const int32_t out_width = out.
shape().x();
45 const T *in_base_addr =
reinterpret_cast<const T *
>(in.
data());
46 T *out_base_addr =
reinterpret_cast<T *
>(out.
data());
48 for(; x < in_width; x += W)
50 for(
int y = 0; y < in_height; y++)
52 const T *in_addr = (in_base_addr + x + y * in_width);
53 T *out_addr = (out_base_addr + y * W + (x / W) * out_width);
55 for(
int k = 0; k < W; ++k)
58 if((x + k) >= in_width)
64 out_addr[k] = in_addr[k];