43 const auto width_in =
static_cast<int>(
src.shape()[0]);
44 const auto height_in =
static_cast<int>(
src.shape()[1]);
45 const auto channel_in =
static_cast<int>(
src.shape()[2]);
46 const auto batch_in =
static_cast<int>(
src.shape()[3]);
47 const int r = channel_in / (block_shape * block_shape);
49 #pragma omp parallel for collapse(4)
51 for(
int b = 0;
b < batch_in; ++
b)
53 for(
int z = 0; z < channel_in; ++z)
55 for(
int y = 0; y < height_in; ++y)
57 for(
int x = 0; x < width_in; ++x)
59 const int out_x = (block_shape * x + (z / r) % block_shape);
60 const int out_y = (block_shape * y + (z / r) / block_shape);
62 const int in_pos = x + width_in * y + z * width_in * height_in +
b * width_in * height_in * channel_in;
63 result[out_pos] =
src[in_pos];