Compute Library
 23.11
AclTypes.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_ACL_TYPES_H_
25 #define ARM_COMPUTE_ACL_TYPES_H_
26 
27 #include <stddef.h>
28 #include <stdint.h>
29 
30 #ifdef __cplusplus
31 extern "C"
32 {
33 #endif /* __cplusplus */
34 
35  /**< Opaque Context object */
36  typedef struct AclContext_ *AclContext;
37  /**< Opaque Queue object */
38  typedef struct AclQueue_ *AclQueue;
39  /**< Opaque Tensor object */
40  typedef struct AclTensor_ *AclTensor;
41  /**< Opaque Tensor pack object */
42  typedef struct AclTensorPack_ *AclTensorPack;
43  /**< Opaque Operator object */
44  typedef struct AclOperator_ *AclOperator;
45 
46  // Capabilities bitfield (Note: if multiple are enabled ComputeLibrary will pick the best possible)
47  typedef uint64_t AclTargetCapabilities;
48 
49  /**< Error codes returned by the public entry-points */
50  typedef enum AclStatus : int32_t
51  {
52  AclSuccess = 0, /**< Call succeeded, leading to valid state for all involved objects/data */
53  AclRuntimeError = 1, /**< Call failed during execution */
54  AclOutOfMemory = 2, /**< Call failed due to failure to allocate resources */
55  AclUnimplemented = 3, /**< Call failed as requested capability is not implemented */
56  AclUnsupportedTarget = 4, /**< Call failed as an invalid backend was requested */
57  AclInvalidTarget = 5, /**< Call failed as invalid argument was passed */
58  AclInvalidArgument = 6, /**< Call failed as invalid argument was passed */
59  AclUnsupportedConfig = 7, /**< Call failed as configuration is unsupported */
60  AclInvalidObjectState = 8, /**< Call failed as an object has invalid state */
61  } AclStatus;
62 
63  /**< Supported CPU targets */
64  typedef enum AclTarget
65  {
66  AclCpu = 0, /**< Cpu target that uses SIMD extensions */
67  AclGpuOcl = 1, /**< OpenCL target for GPU */
68  } AclTarget;
69 
70  /** Execution mode types */
71  typedef enum AclExecutionMode
72  {
73  AclPreferFastRerun = 0, /**< Prioritize performance when multiple iterations are performed */
74  AclPreferFastStart = 1, /**< Prioritize performance when a single iterations is expected to be performed */
76 
77  /** Available CPU capabilities */
78  typedef enum AclCpuCapabilities
79  {
80  AclCpuCapabilitiesAuto = 0, /**< Automatic discovery of capabilities */
81 
82  AclCpuCapabilitiesNeon = (1 << 0), /**< Enable NEON optimized paths */
83  AclCpuCapabilitiesSve = (1 << 1), /**< Enable SVE optimized paths */
84  AclCpuCapabilitiesSve2 = (1 << 2), /**< Enable SVE2 optimized paths */
85  // Reserve 3, 4, 5, 6
86 
87  AclCpuCapabilitiesFp16 = (1 << 7), /**< Enable float16 data-type support */
88  AclCpuCapabilitiesBf16 = (1 << 8), /**< Enable bfloat16 data-type support */
89  // Reserve 9, 10, 11, 12
90 
91  AclCpuCapabilitiesDot = (1 << 13), /**< Enable paths that use the udot/sdot instructions */
92  AclCpuCapabilitiesMmlaInt8 = (1 << 14), /**< Enable paths that use the mmla integer instructions */
93  AclCpuCapabilitiesMmlaFp = (1 << 15), /**< Enable paths that use the mmla float instructions */
94 
95  AclCpuCapabilitiesAll = ~0 /**< Enable all paths */
97 
98  /**< Allocator interface that can be passed to a context */
99  typedef struct AclAllocator
100  {
101  /** Allocate a block of size bytes of memory.
102  *
103  * @param[in] user_data User provided data that can be used by the allocator
104  * @param[in] size Size of the allocation
105  *
106  * @return A pointer to the allocated block if successfull else NULL
107  */
108  void *(*alloc)(void *user_data, size_t size);
109  /** Release a block of size bytes of memory.
110  *
111  * @param[in] user_data User provided data that can be used by the allocator
112  * @param[in] size Size of the allocation
113  */
114  void (*free)(void *user_data, void *ptr);
115  /** Allocate a block of size bytes of memory.
116  *
117  * @param[in] user_data User provided data that can be used by the allocator
118  * @param[in] size Size of the allocation
119  *
120  * @return A pointer to the allocated block if successfull else NULL
121  */
122  void *(*aligned_alloc)(void *user_data, size_t size, size_t alignment);
123  /** Allocate a block of size bytes of memory.
124  *
125  * @param[in] user_data User provided data that can be used by the allocator
126  * @param[in] size Size of the allocation
127  */
128  void (*aligned_free)(void *user_data, void *ptr);
129 
130  /**< User provided information */
131  void *user_data;
132  } AclAllocator;
133 
134  /**< Context options */
135  typedef struct AclContextOptions
136  {
137  AclExecutionMode mode; /**< Execution mode to use */
138  AclTargetCapabilities capabilities; /**< Target capabilities */
139  bool enable_fast_math; /**< Allow precision loss */
140  const char *kernel_config_file; /**< Kernel cofiguration file */
141  int32_t max_compute_units; /**< Max compute units that can be used by a queue created from the context.
142  If <=0 the system will use the hw concurency insted */
143  AclAllocator *allocator; /**< Allocator to be used by all the memory internally */
145 
146  /**< Supported tuning modes */
147  typedef enum
148  {
149  AclTuningModeNone = 0, /**< No tuning */
150  AclRapid = 1, /**< Fast tuning mode, testing a small portion of the tuning space */
151  AclNormal = 2, /**< Normal tuning mode, gives a good balance between tuning mode and performance */
152  AclExhaustive = 3, /**< Exhaustive tuning mode, increased tuning time but with best results */
153  } AclTuningMode;
154 
155  /**< Queue options */
156  typedef struct
157  {
158  AclTuningMode mode; /**< Tuning mode */
159  int32_t compute_units; /**< Compute Units that the queue will deploy */
160  } AclQueueOptions;
161 
162  /**< Supported data types */
163  typedef enum AclDataType
164  {
165  AclDataTypeUnknown = 0, /**< Unknown data type */
166  AclUInt8 = 1, /**< 8-bit unsigned integer */
167  AclInt8 = 2, /**< 8-bit signed integer */
168  AclUInt16 = 3, /**< 16-bit unsigned integer */
169  AclInt16 = 4, /**< 16-bit signed integer */
170  AclUint32 = 5, /**< 32-bit unsigned integer */
171  AclInt32 = 6, /**< 32-bit signed integer */
172  AclFloat16 = 7, /**< 16-bit floating point */
173  AclBFloat16 = 8, /**< 16-bit brain floating point */
174  AclFloat32 = 9, /**< 32-bit floating point */
175  } AclDataType;
176 
177  /**< Supported data layouts for operations */
178  typedef enum AclDataLayout
179  {
180  AclDataLayoutUnknown = 0, /**< Unknown data layout */
181  AclNhwc = 1, /**< Native, performant, Compute Library data layout */
182  AclNchw = 2, /**< Data layout where width is the fastest changing dimension */
183  } AclDataLayout;
184 
185  /** Type of memory to be imported */
186  typedef enum AclImportMemoryType
187  {
188  AclHostPtr = 0 /**< Host allocated memory */
190 
191  /**< Tensor Descriptor */
192  typedef struct AclTensorDescriptor
193  {
194  int32_t ndims; /**< Number or dimensions */
195  int32_t *shape; /**< Tensor Shape */
196  AclDataType data_type; /**< Tensor Data type */
197  int64_t *strides; /**< Strides on each dimension. Linear memory is assumed if nullptr */
198  int64_t boffset; /**< Offset in terms of bytes for the first element */
200 
201  /**< Slot type of a tensor */
202  typedef enum
203  {
205  AclSrc = 0,
206  AclSrc0 = 0,
207  AclSrc1 = 1,
208  AclDst = 30,
209  AclSrcVec = 256,
210  } AclTensorSlot;
211 
212 #ifdef __cplusplus
213 }
214 #endif /* __cplusplus */
215 #endif /* ARM_COMPUTE_ACL_TYPES_H_ */
AclUnsupportedTarget
@ AclUnsupportedTarget
Call failed as an invalid backend was requested.
Definition: AclTypes.h:56
AclTensorDescriptor::boffset
int64_t boffset
Offset in terms of bytes for the first element.
Definition: AclTypes.h:198
AclContextOptions
struct AclContextOptions AclContextOptions
Supported tuning modes.
AclCpu
@ AclCpu
Cpu target that uses SIMD extensions.
Definition: AclTypes.h:66
AclAllocator
Definition: AclTypes.h:99
AclStatus
AclStatus
Definition: AclTypes.h:50
AclTensorDescriptor::strides
int64_t * strides
Strides on each dimension.
Definition: AclTypes.h:197
AclDataType
AclDataType
Definition: AclTypes.h:163
AclDataTypeUnknown
@ AclDataTypeUnknown
Unknown data type.
Definition: AclTypes.h:165
AclExhaustive
@ AclExhaustive
Exhaustive tuning mode, increased tuning time but with best results.
Definition: AclTypes.h:152
AclSrc1
@ AclSrc1
Definition: AclTypes.h:207
AclHostPtr
@ AclHostPtr
Host allocated memory.
Definition: AclTypes.h:188
AclUInt16
@ AclUInt16
16-bit unsigned integer
Definition: AclTypes.h:168
AclContextOptions::capabilities
AclTargetCapabilities capabilities
Target capabilities.
Definition: AclTypes.h:138
AclDataLayoutUnknown
@ AclDataLayoutUnknown
Unknown data layout.
Definition: AclTypes.h:180
AclCpuCapabilitiesMmlaFp
@ AclCpuCapabilitiesMmlaFp
Enable paths that use the mmla float instructions.
Definition: AclTypes.h:93
AclSrc
@ AclSrc
Definition: AclTypes.h:205
AclDst
@ AclDst
Definition: AclTypes.h:208
AclCpuCapabilitiesDot
@ AclCpuCapabilitiesDot
Enable paths that use the udot/sdot instructions.
Definition: AclTypes.h:91
AclSuccess
@ AclSuccess
Call succeeded, leading to valid state for all involved objects/data.
Definition: AclTypes.h:52
AclAllocator::aligned_free
void(* aligned_free)(void *user_data, void *ptr)
Allocate a block of size bytes of memory.
Definition: AclTypes.h:128
AclPreferFastStart
@ AclPreferFastStart
Prioritize performance when a single iterations is expected to be performed.
Definition: AclTypes.h:74
AclUnsupportedConfig
@ AclUnsupportedConfig
Call failed as configuration is unsupported.
Definition: AclTypes.h:59
AclCpuCapabilitiesSve2
@ AclCpuCapabilitiesSve2
Enable SVE2 optimized paths.
Definition: AclTypes.h:84
AclInvalidObjectState
@ AclInvalidObjectState
Call failed as an object has invalid state.
Definition: AclTypes.h:60
AclCpuCapabilitiesMmlaInt8
@ AclCpuCapabilitiesMmlaInt8
Enable paths that use the mmla integer instructions.
Definition: AclTypes.h:92
AclQueueOptions::mode
AclTuningMode mode
Tuning mode.
Definition: AclTypes.h:158
AclDataLayout
AclDataLayout
Definition: AclTypes.h:178
AclTuningMode
AclTuningMode
Definition: AclTypes.h:147
AclTensorSlot
AclTensorSlot
Definition: AclTypes.h:202
AclContextOptions::kernel_config_file
const char * kernel_config_file
Kernel cofiguration file.
Definition: AclTypes.h:140
AclRapid
@ AclRapid
Fast tuning mode, testing a small portion of the tuning space.
Definition: AclTypes.h:150
AclOperator_
Definition: IOperator.h:38
AclAllocator
struct AclAllocator AclAllocator
Context options.
AclOutOfMemory
@ AclOutOfMemory
Call failed due to failure to allocate resources.
Definition: AclTypes.h:54
AclContextOptions
Definition: AclTypes.h:135
AclInt32
@ AclInt32
32-bit signed integer
Definition: AclTypes.h:171
AclContextOptions::mode
AclExecutionMode mode
Execution mode to use.
Definition: AclTypes.h:137
AclInvalidTarget
@ AclInvalidTarget
Call failed as invalid argument was passed.
Definition: AclTypes.h:57
AclFloat16
@ AclFloat16
16-bit floating point
Definition: AclTypes.h:172
AclTensorDescriptor::data_type
AclDataType data_type
Tensor Data type.
Definition: AclTypes.h:196
AclOperator
struct AclOperator_ * AclOperator
Definition: AclTypes.h:44
AclCpuCapabilitiesFp16
@ AclCpuCapabilitiesFp16
Enable float16 data-type support.
Definition: AclTypes.h:87
AclAllocator::user_data
void * user_data
Definition: AclTypes.h:131
AclUInt8
@ AclUInt8
8-bit unsigned integer
Definition: AclTypes.h:166
AclCpuCapabilitiesBf16
@ AclCpuCapabilitiesBf16
Enable bfloat16 data-type support.
Definition: AclTypes.h:88
AclPreferFastRerun
@ AclPreferFastRerun
Prioritize performance when multiple iterations are performed.
Definition: AclTypes.h:73
AclImportMemoryType
AclImportMemoryType
Type of memory to be imported.
Definition: AclTypes.h:186
AclCpuCapabilitiesAuto
@ AclCpuCapabilitiesAuto
Automatic discovery of capabilities.
Definition: AclTypes.h:80
AclNhwc
@ AclNhwc
Native, performant, Compute Library data layout.
Definition: AclTypes.h:181
AclContext
struct AclContext_ * AclContext
< Opaque Context object
Definition: AclTypes.h:36
AclTargetCapabilities
uint64_t AclTargetCapabilities
Error codes returned by the public entry-points.
Definition: AclTypes.h:47
AclFloat32
@ AclFloat32
32-bit floating point
Definition: AclTypes.h:174
AclRuntimeError
@ AclRuntimeError
Call failed during execution.
Definition: AclTypes.h:53
AclInt8
@ AclInt8
8-bit signed integer
Definition: AclTypes.h:167
AclBFloat16
@ AclBFloat16
16-bit brain floating point
Definition: AclTypes.h:173
AclExecutionMode
AclExecutionMode
Execution mode types.
Definition: AclTypes.h:71
AclTuningModeNone
@ AclTuningModeNone
No tuning.
Definition: AclTypes.h:149
AclTensor
struct AclTensor_ * AclTensor
Opaque Tensor pack object.
Definition: AclTypes.h:40
AclInvalidArgument
@ AclInvalidArgument
Call failed as invalid argument was passed.
Definition: AclTypes.h:58
AclTensorPack
struct AclTensorPack_ * AclTensorPack
Opaque Operator object.
Definition: AclTypes.h:42
AclTensor_
Definition: ITensorV2.h:30
AclSrc0
@ AclSrc0
Definition: AclTypes.h:206
AclGpuOcl
@ AclGpuOcl
OpenCL target for GPU.
Definition: AclTypes.h:67
AclAllocator::free
void(* free)(void *user_data, void *ptr)
Release a block of size bytes of memory.
Definition: AclTypes.h:114
AclTensorDescriptor
Definition: AclTypes.h:192
AclTensorDescriptor::shape
int32_t * shape
Tensor Shape.
Definition: AclTypes.h:195
AclCpuCapabilitiesAll
@ AclCpuCapabilitiesAll
Enable all paths.
Definition: AclTypes.h:95
AclQueue_
Definition: IQueue.h:29
AclNormal
@ AclNormal
Normal tuning mode, gives a good balance between tuning mode and performance.
Definition: AclTypes.h:151
AclTensorPack_
Definition: TensorPack.h:31
AclSlotUnknown
@ AclSlotUnknown
Definition: AclTypes.h:204
AclCpuCapabilitiesSve
@ AclCpuCapabilitiesSve
Enable SVE optimized paths.
Definition: AclTypes.h:83
AclTensorDescriptor::ndims
int32_t ndims
Number or dimensions.
Definition: AclTypes.h:194
AclCpuCapabilities
AclCpuCapabilities
Available CPU capabilities.
Definition: AclTypes.h:78
AclContextOptions::allocator
AclAllocator * allocator
Allocator to be used by all the memory internally.
Definition: AclTypes.h:143
AclContext_
Definition: IContext.h:34
AclUint32
@ AclUint32
32-bit unsigned integer
Definition: AclTypes.h:170
AclInt16
@ AclInt16
16-bit signed integer
Definition: AclTypes.h:169
AclQueueOptions
Definition: AclTypes.h:156
AclUnimplemented
@ AclUnimplemented
Call failed as requested capability is not implemented.
Definition: AclTypes.h:55
AclContextOptions::enable_fast_math
bool enable_fast_math
Allow precision loss.
Definition: AclTypes.h:139
AclQueue
struct AclQueue_ * AclQueue
Opaque Tensor object.
Definition: AclTypes.h:38
AclTarget
AclTarget
Definition: AclTypes.h:64
AclTensorDescriptor
struct AclTensorDescriptor AclTensorDescriptor
Slot type of a tensor.
AclQueueOptions::compute_units
int32_t compute_units
Compute Units that the queue will deploy.
Definition: AclTypes.h:159
AclNchw
@ AclNchw
Data layout where width is the fastest changing dimension.
Definition: AclTypes.h:182
AclSrcVec
@ AclSrcVec
Definition: AclTypes.h:209
AclCpuCapabilitiesNeon
@ AclCpuCapabilitiesNeon
Enable NEON optimized paths.
Definition: AclTypes.h:82
AclContextOptions::max_compute_units
int32_t max_compute_units
Max compute units that can be used by a queue created from the context.
Definition: AclTypes.h:141