24.02.1
asmlib.hpp
Go to the documentation of this file.
1
/*
2
* Copyright (c) 2017-2018,2021 Arm Limited.
3
*
4
* SPDX-License-Identifier: MIT
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to
8
* deal in the Software without restriction, including without limitation the
9
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
* sell copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in all
14
* copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#pragma once
25
26
#ifdef __aarch64__
27
// Macro to use in assembler to get a preload. Needed because of various
28
// workarounds needed to get working preload behaviour.
29
//
30
// Code using these macros needs to clobber x20 and x21 as they might be
31
// used by the workaround.
32
33
// "Correct" version
34
#define ASM_PREFETCH(address) "PRFM PLDL1KEEP, " address "\n"
35
#define ASM_PREFETCHU(address) "PRFUM PLDL1KEEP, " address "\n"
36
#define ASM_PREFETCHL2(address) "PRFM PLDL2KEEP, " address "\n"
37
#define ASM_PREFETCHW(address) "PRFM PSTL1KEEP, " address "\n"
38
#define ASM_PREFETCHWL2(address) "PRFM PSTL2KEEP, " address "\n"
39
40
// No preload at all
41
//#define ASM_PREFETCH(address) ""
42
#else
43
44
// "Correct" versions for AArch32
45
#define ASM_PREFETCH(address) "PLD " address "\n"
46
#define ASM_PREFETCHW(address) "PLDW " address "\n"
47
48
#endif
49
50
/*
51
* Do some prefetches.
52
*/
53
template
<
typename
T>
54
static
inline
void
prefetch_6x(
const
T *pfp) {
55
__asm __volatile (
56
ASM_PREFETCH
(
"[%[pfp]]"
)
57
ASM_PREFETCH
(
"[%[pfp], #64]"
)
58
ASM_PREFETCH
(
"[%[pfp], #128]"
)
59
ASM_PREFETCH
(
"[%[pfp], #192]"
)
60
ASM_PREFETCH
(
"[%[pfp], #256]"
)
61
ASM_PREFETCH
(
"[%[pfp], #320]"
)
62
:
63
: [pfp]
"r"
(pfp)
64
:
"memory"
65
);
66
}
67
68
template
<
typename
T>
69
static
inline
void
prefetch_5x(
const
T *pfp) {
70
__asm __volatile (
71
ASM_PREFETCH
(
"[%[pfp]]"
)
72
ASM_PREFETCH
(
"[%[pfp], #64]"
)
73
ASM_PREFETCH
(
"[%[pfp], #128]"
)
74
ASM_PREFETCH
(
"[%[pfp], #192]"
)
75
ASM_PREFETCH
(
"[%[pfp], #256]"
)
76
:
77
: [pfp]
"r"
(pfp)
78
:
"memory"
79
);
80
}
81
82
template
<
typename
T>
83
static
inline
void
prefetch_4x(
const
T *pfp) {
84
__asm __volatile (
85
ASM_PREFETCH
(
"[%[pfp]]"
)
86
ASM_PREFETCH
(
"[%[pfp], #64]"
)
87
ASM_PREFETCH
(
"[%[pfp], #128]"
)
88
ASM_PREFETCH
(
"[%[pfp], #192]"
)
89
:
90
: [pfp]
"r"
(pfp)
91
:
"memory"
92
);
93
}
94
95
template
<
typename
T>
96
static
inline
void
prefetch_3x(
const
T *pfp) {
97
__asm __volatile (
98
ASM_PREFETCH
(
"[%[pfp]]"
)
99
ASM_PREFETCH
(
"[%[pfp], #64]"
)
100
ASM_PREFETCH
(
"[%[pfp], #128]"
)
101
:
102
: [pfp]
"r"
(pfp)
103
:
"memory"
104
);
105
}
106
107
template
<
typename
T>
108
static
inline
void
prefetch_2x(
const
T *pfp) {
109
__asm __volatile (
110
ASM_PREFETCH
(
"[%[pfp]]"
)
111
ASM_PREFETCH
(
"[%[pfp], #64]"
)
112
:
113
: [pfp]
"r"
(pfp)
114
:
"memory"
115
);
116
}
117
118
template
<
typename
T>
119
static
inline
void
prefetch_1x(
const
T *pfp) {
120
__asm __volatile (
121
ASM_PREFETCH
(
"[%[pfp]]"
)
122
:
123
: [pfp]
"r"
(pfp)
124
:
"memory"
125
);
126
}
127
ASM_PREFETCH
#define ASM_PREFETCH(address)
Definition:
asmlib.hpp:45
src
core
NEON
kernels
arm_gemm
asmlib.hpp
Generated on Mon Mar 18 2024 11:31:58 for Compute Library by
1.8.17