Compute Library
 21.08
AclTypes.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_ACL_TYPES_H_
25 #define ARM_COMPUTE_ACL_TYPES_H_
26 
27 #include <stddef.h>
28 #include <stdint.h>
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif /* __cplusplus */
33 
34 /**< Opaque Context object */
35 typedef struct AclContext_ *AclContext;
36 /**< Opaque Queue object */
37 typedef struct AclQueue_ *AclQueue;
38 /**< Opaque Tensor object */
39 typedef struct AclTensor_ *AclTensor;
40 /**< Opaque Tensor pack object */
41 typedef struct AclTensorPack_ *AclTensorPack;
42 /**< Opaque Operator object */
43 typedef struct AclOperator_ *AclOperator;
44 
45 // Capabilities bitfield (Note: if multiple are enabled ComputeLibrary will pick the best possible)
46 typedef uint64_t AclTargetCapabilities;
47 
48 /**< Error codes returned by the public entry-points */
49 typedef enum AclStatus : int32_t
50 {
51  AclSuccess = 0, /**< Call succeeded, leading to valid state for all involved objects/data */
52  AclRuntimeError = 1, /**< Call failed during execution */
53  AclOutOfMemory = 2, /**< Call failed due to failure to allocate resources */
54  AclUnimplemented = 3, /**< Call failed as requested capability is not implemented */
55  AclUnsupportedTarget = 4, /**< Call failed as an invalid backend was requested */
56  AclInvalidTarget = 5, /**< Call failed as invalid argument was passed */
57  AclInvalidArgument = 6, /**< Call failed as invalid argument was passed */
58  AclUnsupportedConfig = 7, /**< Call failed as configuration is unsupported */
59  AclInvalidObjectState = 8, /**< Call failed as an object has invalid state */
60 } AclStatus;
61 
62 /**< Supported CPU targets */
63 typedef enum AclTarget
64 {
65  AclCpu = 0, /**< Cpu target that uses SIMD extensions */
66  AclGpuOcl = 1, /**< OpenCL target for GPU */
67 } AclTarget;
68 
69 /** Execution mode types */
70 typedef enum AclExecutionMode
71 {
72  AclPreferFastRerun = 0, /**< Prioritize performance when multiple iterations are performed */
73  AclPreferFastStart = 1, /**< Prioritize performance when a single iterations is expected to be performed */
75 
76 /** Available CPU capabilities */
77 typedef enum AclCpuCapabilities
78 {
79  AclCpuCapabilitiesAuto = 0, /**< Automatic discovery of capabilities */
80 
81  AclCpuCapabilitiesNeon = (1 << 0), /**< Enable NEON optimized paths */
82  AclCpuCapabilitiesSve = (1 << 1), /**< Enable SVE optimized paths */
83  AclCpuCapabilitiesSve2 = (1 << 2), /**< Enable SVE2 optimized paths */
84  // Reserve 3, 4, 5, 6
85 
86  AclCpuCapabilitiesFp16 = (1 << 7), /**< Enable float16 data-type support */
87  AclCpuCapabilitiesBf16 = (1 << 8), /**< Enable bfloat16 data-type support */
88  // Reserve 9, 10, 11, 12
89 
90  AclCpuCapabilitiesDot = (1 << 13), /**< Enable paths that use the udot/sdot instructions */
91  AclCpuCapabilitiesMmlaInt8 = (1 << 14), /**< Enable paths that use the mmla integer instructions */
92  AclCpuCapabilitiesMmlaFp = (1 << 15), /**< Enable paths that use the mmla float instructions */
93 
94  AclCpuCapabilitiesAll = ~0 /**< Enable all paths */
96 
97 /**< Allocator interface that can be passed to a context */
98 typedef struct AclAllocator
99 {
100  /** Allocate a block of size bytes of memory.
101  *
102  * @param[in] user_data User provided data that can be used by the allocator
103  * @param[in] size Size of the allocation
104  *
105  * @return A pointer to the allocated block if successfull else NULL
106  */
107  void *(*alloc)(void *user_data, size_t size);
108  /** Release a block of size bytes of memory.
109  *
110  * @param[in] user_data User provided data that can be used by the allocator
111  * @param[in] size Size of the allocation
112  */
113  void (*free)(void *user_data, void *ptr);
114  /** Allocate a block of size bytes of memory.
115  *
116  * @param[in] user_data User provided data that can be used by the allocator
117  * @param[in] size Size of the allocation
118  *
119  * @return A pointer to the allocated block if successfull else NULL
120  */
121  void *(*aligned_alloc)(void *user_data, size_t size, size_t alignment);
122  /** Allocate a block of size bytes of memory.
123  *
124  * @param[in] user_data User provided data that can be used by the allocator
125  * @param[in] size Size of the allocation
126  */
127  void (*aligned_free)(void *user_data, void *ptr);
128 
129  /**< User provided information */
130  void *user_data;
131 } AclAllocator;
132 
133 /**< Context options */
134 typedef struct AclContextOptions
135 {
136  AclExecutionMode mode; /**< Execution mode to use */
137  AclTargetCapabilities capabilities; /**< Target capabilities */
138  bool enable_fast_math; /**< Allow precision loss */
139  const char *kernel_config_file; /**< Kernel cofiguration file */
140  int32_t max_compute_units; /**< Max compute units that can be used by a queue created from the context.
141  If <=0 the system will use the hw concurency insted */
142  AclAllocator *allocator; /**< Allocator to be used by all the memory internally */
144 
145 /**< Supported tuning modes */
146 typedef enum
147 {
148  AclTuningModeNone = 0, /**< No tuning */
149  AclRapid = 1, /**< Fast tuning mode, testing a small portion of the tuning space */
150  AclNormal = 2, /**< Normal tuning mode, gives a good balance between tuning mode and performance */
151  AclExhaustive = 3, /**< Exhaustive tuning mode, increased tuning time but with best results */
152 } AclTuningMode;
153 
154 /**< Queue options */
155 typedef struct
156 {
157  AclTuningMode mode; /**< Tuning mode */
158  int32_t compute_units; /**< Compute Units that the queue will deploy */
160 
161 /**< Supported data types */
162 typedef enum AclDataType
163 {
164  AclDataTypeUnknown = 0, /**< Unknown data type */
165  AclUInt8 = 1, /**< 8-bit unsigned integer */
166  AclInt8 = 2, /**< 8-bit signed integer */
167  AclUInt16 = 3, /**< 16-bit unsigned integer */
168  AclInt16 = 4, /**< 16-bit signed integer */
169  AclUint32 = 5, /**< 32-bit unsigned integer */
170  AclInt32 = 6, /**< 32-bit signed integer */
171  AclFloat16 = 7, /**< 16-bit floating point */
172  AclBFloat16 = 8, /**< 16-bit brain floating point */
173  AclFloat32 = 9, /**< 32-bit floating point */
174 } AclDataType;
175 
176 /**< Supported data layouts for operations */
177 typedef enum AclDataLayout
178 {
179  AclDataLayoutUnknown = 0, /**< Unknown data layout */
180  AclNhwc = 1, /**< Native, performant, Compute Library data layout */
181  AclNchw = 2, /**< Data layout where width is the fastest changing dimension */
182 } AclDataLayout;
183 
184 /** Type of memory to be imported */
186 {
187  AclHostPtr = 0 /**< Host allocated memory */
189 
190 /**< Tensor Descriptor */
191 typedef struct AclTensorDescriptor
192 {
193  int32_t ndims; /**< Number or dimensions */
194  int32_t *shape; /**< Tensor Shape */
195  AclDataType data_type; /**< Tensor Data type */
196  int64_t *strides; /**< Strides on each dimension. Linear memory is assumed if nullptr */
197  int64_t boffset; /**< Offset in terms of bytes for the first element */
199 
200 /**< Slot type of a tensor */
201 typedef enum
202 {
204  AclSrc = 0,
205  AclSrc0 = 0,
206  AclSrc1 = 1,
207  AclDst = 30,
208  AclSrcVec = 256,
209 } AclTensorSlot;
210 
211 #ifdef __cplusplus
212 }
213 #endif /* __cplusplus */
214 #endif /* ARM_COMPUTE_ACL_TYPES_H_ */
void * user_data
Definition: AclTypes.h:130
struct AclTensor_ * AclTensor
Opaque Tensor pack object.
Definition: AclTypes.h:39
32-bit floating point
Definition: AclTypes.h:173
struct AclTensorPack_ * AclTensorPack
Opaque Operator object.
Definition: AclTypes.h:41
32-bit signed integer
Definition: AclTypes.h:170
Enable bfloat16 data-type support.
Definition: AclTypes.h:87
Host allocated memory.
Definition: AclTypes.h:187
Call failed during execution.
Definition: AclTypes.h:52
Prioritize performance when a single iterations is expected to be performed.
Definition: AclTypes.h:73
OpenCL target for GPU.
Definition: AclTypes.h:66
AclDataType
Definition: AclTypes.h:162
No tuning.
Definition: AclTypes.h:148
int32_t compute_units
Compute Units that the queue will deploy.
Definition: AclTypes.h:158
AclTarget
Definition: AclTypes.h:63
Enable all paths.
Definition: AclTypes.h:94
int64_t * strides
Strides on each dimension.
Definition: AclTypes.h:196
Normal tuning mode, gives a good balance between tuning mode and performance.
Definition: AclTypes.h:150
void(* free)(void *user_data, void *ptr)
Release a block of size bytes of memory.
Definition: AclTypes.h:113
Call failed as invalid argument was passed.
Definition: AclTypes.h:56
Unknown data type.
Definition: AclTypes.h:164
Automatic discovery of capabilities.
Definition: AclTypes.h:79
Native, performant, Compute Library data layout.
Definition: AclTypes.h:180
AclDataLayout
Definition: AclTypes.h:177
Enable SVE optimized paths.
Definition: AclTypes.h:82
AclTuningMode
Definition: AclTypes.h:146
const char * kernel_config_file
Kernel cofiguration file.
Definition: AclTypes.h:139
int32_t max_compute_units
Max compute units that can be used by a queue created from the context.
Definition: AclTypes.h:140
Enable paths that use the mmla integer instructions.
Definition: AclTypes.h:91
Enable NEON optimized paths.
Definition: AclTypes.h:81
int32_t ndims
Number or dimensions.
Definition: AclTypes.h:193
16-bit signed integer
Definition: AclTypes.h:168
AclExecutionMode mode
Execution mode to use.
Definition: AclTypes.h:136
AclTargetCapabilities capabilities
Target capabilities.
Definition: AclTypes.h:137
16-bit unsigned integer
Definition: AclTypes.h:167
uint64_t AclTargetCapabilities
Error codes returned by the public entry-points.
Definition: AclTypes.h:46
Call failed as invalid argument was passed.
Definition: AclTypes.h:57
Call failed as an invalid backend was requested.
Definition: AclTypes.h:55
Cpu target that uses SIMD extensions.
Definition: AclTypes.h:65
AclTuningMode mode
Tuning mode.
Definition: AclTypes.h:157
struct AclContextOptions AclContextOptions
Supported tuning modes.
Unknown data layout.
Definition: AclTypes.h:179
struct AclOperator_ * AclOperator
Definition: AclTypes.h:43
int64_t boffset
Offset in terms of bytes for the first element.
Definition: AclTypes.h:197
Call failed as configuration is unsupported.
Definition: AclTypes.h:58
Call succeeded, leading to valid state for all involved objects/data.
Definition: AclTypes.h:51
32-bit unsigned integer
Definition: AclTypes.h:169
struct AclQueue_ * AclQueue
Opaque Tensor object.
Definition: AclTypes.h:37
8-bit signed integer
Definition: AclTypes.h:166
Fast tuning mode, testing a small portion of the tuning space.
Definition: AclTypes.h:149
Call failed as requested capability is not implemented.
Definition: AclTypes.h:54
Enable SVE2 optimized paths.
Definition: AclTypes.h:83
Data layout where width is the fastest changing dimension.
Definition: AclTypes.h:181
void(* aligned_free)(void *user_data, void *ptr)
Allocate a block of size bytes of memory.
Definition: AclTypes.h:127
16-bit brain floating point
Definition: AclTypes.h:172
16-bit floating point
Definition: AclTypes.h:171
AclDataType data_type
Tensor Data type.
Definition: AclTypes.h:195
AclImportMemoryType
Type of memory to be imported.
Definition: AclTypes.h:185
struct AclContext_ * AclContext
< Opaque Context object
Definition: AclTypes.h:35
Call failed due to failure to allocate resources.
Definition: AclTypes.h:53
8-bit unsigned integer
Definition: AclTypes.h:165
bool enable_fast_math
Allow precision loss.
Definition: AclTypes.h:138
struct AclTensorDescriptor AclTensorDescriptor
Slot type of a tensor.
Enable paths that use the mmla float instructions.
Definition: AclTypes.h:92
AclCpuCapabilities
Available CPU capabilities.
Definition: AclTypes.h:77
Exhaustive tuning mode, increased tuning time but with best results.
Definition: AclTypes.h:151
Prioritize performance when multiple iterations are performed.
Definition: AclTypes.h:72
Enable float16 data-type support.
Definition: AclTypes.h:86
AclAllocator * allocator
Allocator to be used by all the memory internally.
Definition: AclTypes.h:142
Enable paths that use the udot/sdot instructions.
Definition: AclTypes.h:90
int32_t * shape
Tensor Shape.
Definition: AclTypes.h:194
Call failed as an object has invalid state.
Definition: AclTypes.h:59
struct AclAllocator AclAllocator
Context options.
AclTensorSlot
Definition: AclTypes.h:201
AclStatus
Definition: AclTypes.h:49
AclExecutionMode
Execution mode types.
Definition: AclTypes.h:70