34 T* out,
const T*
const in,
const int stride,
35 const int x0,
const int xmax,
const int k0,
const int kmax
39 reinterpret_cast<uint16_t *
>(out),
40 reinterpret_cast<const uint16_t *
>(in),
41 stride*2, x0*2, xmax*2, k0, kmax
49 T* out,
const T*
const in,
const int stride,
50 const int x0,
const int xmax,
const int k0,
const int kmax
54 reinterpret_cast<uint16_t *
>(out),
55 reinterpret_cast<const uint16_t *
>(in),
56 stride, x0, xmax, k0, kmax
64 "VLD1.32 {d0-d3}, [%[in0]]!\n"
65 "VST1.32 {d0-d3}, [%[out]]\n"
70 :
"q0",
"q1",
"memory"
77 "VLD1.32 {d0-d3}, [%[in0]]!\n"
78 "VST1.32 {d0-d3}, [%[out]]!\n"
80 "VLD1.32 {d0-d3}, [%[in1]]!\n"
81 "VST1.32 {d0-d3}, [%[out]]\n"
83 "SUB %[out], %[out], #32\n"
88 :
"q0",
"q1",
"memory"
95 "VLD1.32 {d0-d3}, [%[in0]]!\n"
96 "VST1.32 {d0-d3}, [%[out]]!\n"
98 "VLD1.32 {d0-d3}, [%[in1]]!\n"
99 "VST1.32 {d0-d3}, [%[out]]!\n"
101 "VLD1.32 {d0-d3}, [%[in2]]!\n"
102 "VST1.32 {d0-d3}, [%[out]]!\n"
104 "VLD1.32 {d0-d3}, [%[in3]]!\n"
105 "VST1.32 {d0-d3}, [%[out]]\n"
107 "SUB %[out], %[out], #96\n"
114 :
"q0",
"q1",
"memory"
121 uint16_t* out,
const uint16_t*
const in,
const int stride,
122 const int x0,
const int xmax,
const int k0,
const int kmax