26 #include "../asmlib.hpp"
28 template <
unsigned int IntBy,
typename TIn,
typename TOut>
32 for (
unsigned int i = 0; i < IntBy; i++) {
33 *out++ =
static_cast<TOut
>(*in0++);
37 static inline void moveblock_1x2(
const TIn *&in0,
const TIn *&in1, TOut *out) {
38 for (
unsigned int i = 0; i < IntBy; i++) {
39 *out++ =
static_cast<TOut
>(*in0++);
41 for (
unsigned int i = 0; i < IntBy; i++) {
42 *out++ =
static_cast<TOut
>(*in1++);
46 static inline void moveblock_1x4(
const TIn *&in0,
const TIn *&in1,
const TIn *&in2,
const TIn *&in3, TOut *out) {
47 for (
unsigned int i = 0; i < IntBy; i++) {
48 *out++ =
static_cast<TOut
>(*in0++);
50 for (
unsigned int i = 0; i < IntBy; i++) {
51 *out++ =
static_cast<TOut
>(*in1++);
53 for (
unsigned int i = 0; i < IntBy; i++) {
54 *out++ =
static_cast<TOut
>(*in2++);
56 for (
unsigned int i = 0; i < IntBy; i++) {
57 *out++ =
static_cast<TOut
>(*in3++);
61 static void Transform(TOut *out,
const TIn *in,
const int stride,
const int x0,
const int xmax,
const int k0,
const int kmax) {
62 const auto ldin = stride;
65 const TIn *inarray = in;
66 TOut *outptr_base = outarray;
67 const TIn *inptr_base = inarray + x0 + (k0 * ldin);
68 int ldout = (kmax - k0) * IntBy;
72 TOut *outptr = outptr_base;
73 const TIn *inptr = inptr_base;
74 const TIn *inptr1 = inptr + ldin;
75 const TIn *inptr2 = inptr1 + ldin;
76 const TIn *inptr3 = inptr2 + ldin;
83 outptr_base += IntBy * 4;
84 inptr_base += ldin * 4;
86 for (
int x = (xmax-x0) / IntBy; x > 0 ; x--) {
93 TOut *outptr = outptr_base;
94 const TIn *inptr = inptr_base;
95 const TIn *inptr1 = inptr + ldin;
96 const TIn *inptr2 = inptr1 + ldin;
102 for (
int x = (xmax-x0) / IntBy; x > 0 ; x--) {
126 const unsigned int overflow = (xmax - x0) % IntBy;
128 const TIn *inptr_base = inarray + (xmax - overflow) + (k0 * ldin);
129 TOut *outptr = outarray + ((xmax - x0) / IntBy) * ldout;
131 for (
int k=(kmax-k0); k>0; k--) {
132 const TIn *inptr = inptr_base;
135 for (
unsigned int x=0; x < IntBy; x++) {
136 TOut val = (x < overflow) ? static_cast<TOut>(*inptr++) :
static_cast<TOut
>(0);