28 #if !defined(_WIN64) && !defined(__OpenBSD__)
45 template <
unsigned int tIntBy,
unsigned int BlockBy,
bool Transposed,
size_t TOutSize,
size_t TInSize, VLType vlt>
46 struct TransformImpl {
47 template <
typename TOut,
typename TIn>
48 static void Transform(TOut* out,
const TIn*
const in,
const int stride,
49 const int y0,
const int ymax,
const int x0,
const int xmax) {
54 const unsigned int IntBy = tIntBy;
56 const int n_whole_y_blocks = (ymax - y0) / IntBy;
57 const int y_remainders = (ymax - y0) % IntBy;
58 const int n_y_blocks = n_whole_y_blocks + (y_remainders ? 1 : 0);
60 const int n_whole_x_blocks = (xmax - x0) / BlockBy;
61 const int x_remainders = (xmax - x0) % BlockBy;
62 const int n_x_blocks = n_whole_x_blocks + (x_remainders ? 1 : 0);
67 for (
int y_block=0 ; y_block < n_y_blocks; y_block++) {
68 int fill_rows = (y_block < n_whole_y_blocks) ? IntBy : y_remainders;
69 int blank_rows = IntBy - fill_rows;
71 int y_base = y0 + (y_block * IntBy);
74 for (
int x_block=0 ; x_block < n_x_blocks; x_block++) {
75 int fill_cols = (x_block < n_whole_x_blocks) ? BlockBy : x_remainders;
76 int blank_cols = BlockBy - fill_cols;
78 int x_base = x0 + (x_block * BlockBy);
80 for (
int row = 0; row < fill_rows; row++) {
81 for (
int col = 0; col < fill_cols; col++) {
84 *out++ =
static_cast<TOut
>(in[(x_base + col) * stride + y_base + row]);
86 *out++ =
static_cast<TOut
>(in[(y_base + row) * stride + x_base + col]);
90 for (
int col=0; col < blank_cols; col++) {
91 *out++ =
static_cast<TOut
>(0);
95 TOut zeroval =
static_cast<TOut
>(0);
96 int pads = blank_rows * (fill_cols + blank_cols);
98 for (
int i=0; i<pads; i++) {
107 template <
typename T>
108 static void Transform(T* out,
const T*
const in,
const int stride,
109 const int k0,
const int kmax,
const int x0,
const int xmax) {
110 Transform<T, T>(out, in, stride, k0, kmax, x0, xmax);
115 template <
unsigned int IntBy,
unsigned int BlockBy,
bool Transposed, VLType vlt=VLType::None,
typename TOut,
typename TIn>
117 TOut* out,
const TIn*
const in,
const int stride,
118 const int k0,
const int kmax,
const int x0,
const int xmax
121 TransformImpl<IntBy, BlockBy, Transposed,
sizeof(TOut),
sizeof(TIn), vlt>
::Transform(
122 out, in, stride, k0, kmax, x0, xmax
131 template void Transform<8, 1, true, VLType::None>(
float *,
const float *,
int,
int,
int,
int,
int);
132 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
133 template void Transform<8, 1, true, VLType::None>(
float *,
const __fp16 *,
int,
int,
int,
int,
int);
134 #endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
135 #ifdef ARM_COMPUTE_ENABLE_BF16
136 template void Transform<8, 1, true, VLType::None>(
float *,
const bfloat16 *,
int,
int,
int,
int,
int);