30 #include "../asmlib.hpp"
33 void interleave_block<6, 1, VLType::None, false>(
34 float * &outptr,
const float *
const * in,
size_t width,
size_t height,
35 size_t row_offset,
bool
38 const float *inptr0 = in[0] + row_offset;
39 const float *inptr1 = in[1] + row_offset;
40 const float *inptr2 = in[2] + row_offset;
41 const float *inptr3 = in[3] + row_offset;
42 const float *inptr4 = in[4] + row_offset;
43 const float *inptr5 = in[5] + row_offset;
75 for (;width>7;width-=8) {
78 "VLD1.32 {d0-d3}, [%[inptr0]]!\n"
79 "VLD1.32 {d4-d7}, [%[inptr1]]!\n"
80 "VLD1.32 {d8-d11}, [%[inptr2]]!\n"
82 "VLD1.32 {d12-d15}, [%[inptr3]]!\n"
84 "VLD1.32 {d16-d19}, [%[inptr4]]!\n"
85 "VLD1.32 {d20-d23}, [%[inptr5]]!\n"
91 "VST1.32 {d0-d1}, [%[outptr]]!\n"
92 "VST1.32 {d16}, [%[outptr]]!\n"
97 "VST1.32 {d4-d5}, [%[outptr]]!\n"
100 "VST1.32 {d17}, [%[outptr]]!\n"
105 "VST1.32 {d8-d9}, [%[outptr]]!\n"
108 "VST1.32 {d20}, [%[outptr]]!\n"
112 "VST1.32 {d12-d13}, [%[outptr]]!\n"
114 "VST1.32 {d21}, [%[outptr]]!\n"
117 "VST1.32 {d2-d3}, [%[outptr]]!\n"
119 "VST1.32 {d18}, [%[outptr]]!\n"
122 "VST1.32 {d6-d7}, [%[outptr]]!\n"
124 "VST1.32 {d19}, [%[outptr]]!\n"
127 "VST1.32 {d10-d11}, [%[outptr]]!\n"
128 "VST1.32 {d22}, [%[outptr]]!\n"
131 "VST1.32 {d14-d15}, [%[outptr]]!\n"
132 "VST1.32 {d23}, [%[outptr]]!\n"
134 : [inptr0]
"+r" (inptr0), [inptr1]
"+r" (inptr1), [inptr2]
"+r" (inptr2), [inptr3]
"+r" (inptr3),
135 [inptr4]
"+r" (inptr4), [inptr5]
"+r" (inptr5), [outptr]
"+r" (outptr)
137 :
"q0",
"q1",
"q2",
"q3",
"q4",
"q5",
"q6",
"q7",
"q8",
"q9",
"q10",
"q11",
"q12",
"memory"
141 for (;width>0;width--) {
142 *outptr++ = *inptr0++;
143 *outptr++ = *inptr1++;
144 *outptr++ = *inptr2++;
145 *outptr++ = *inptr3++;
146 *outptr++ = *inptr4++;
147 *outptr++ = *inptr5++;