23.08
|
Go to the documentation of this file.
24 #ifndef ARM_COMPUTE_ACL_HPP_
25 #define ARM_COMPUTE_ACL_HPP_
34 #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED)
39 #define ARM_COMPUTE_IGNORE_UNUSED(x) (void)(x)
75 #define OBJECT_DELETER(obj, func) \
77 struct ObjectDeleter<obj> \
80 static inline AclStatus Destroy(obj v) \
103 template <
typename E,
typename SE>
118 template <
typename SE,
typename E>
121 return static_cast<SE
>(val);
130 template <
typename T>
158 return _object.get();
166 return _object.get();
174 std::shared_ptr<T> _object{
nullptr };
186 template <
typename T>
189 return lhs.
get() == rhs.
get();
201 template <
typename T>
204 return !(lhs == rhs);
208 #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED)
216 class Status :
public std::exception
224 Status(
StatusCode status,
const std::string &msg)
225 : _status(status), _msg(msg)
232 const char *what() const noexcept
override
267 static inline void report_status(
StatusCode status,
const std::string &msg)
271 throw Status(status, msg);
285 static inline void report_status(
StatusCode status,
const std::string &msg)
350 bool enable_fast_math,
351 const char *kernel_config,
352 int32_t max_compute_units,
388 const auto st = detail::as_enum<StatusCode>(
AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.
copts));
390 report_status(st,
"[Compute Library] Failed to create context");
435 :
opts{ detail::as_cenum<AclTuningMode>(
mode), compute_units }
466 const auto st = detail::as_enum<StatusCode>(
AclCreateQueue(&queue, ctx.
get(), &options.opts));
468 report_status(st,
"[Compute Library] Failed to create queue!");
480 return detail::as_enum<StatusCode>(
AclQueueFinish(_object.get()));
514 _cdesc.
ndims = _shape.size();
515 _cdesc.
shape = _shape.data();
516 _cdesc.
data_type = detail::as_cenum<AclDataType>(_data_type);
527 _data_type = detail::as_enum<DataType>(desc.
data_type);
528 _shape.reserve(desc.
ndims);
529 for(int32_t d = 0; d < desc.
ndims; ++d)
531 _shape.emplace_back(desc.
shape[d]);
552 is_same &= _data_type == other._data_type;
553 is_same &= _shape.size() == other._shape.size();
557 for(uint32_t d = 0; d < _shape.size(); ++d)
559 is_same &= _shape[d] == other._shape[d];
567 std::vector<int32_t> _shape{};
596 :
Tensor(ctx, desc, true, status)
611 report_status(st,
"[Compute Library] Failed to create tensor!");
623 void *handle =
nullptr;
624 const auto st = detail::as_enum<StatusCode>(
AclMapTensor(_object.get(), &handle));
625 report_status(st,
"[Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
636 const auto st = detail::as_enum<StatusCode>(
AclUnmapTensor(_object.get(), handle));
637 report_status(st,
"[Compute Library] Failed to unmap the tensor!");
649 const auto st = detail::as_enum<StatusCode>(
AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(
type)));
650 report_status(st,
"[Compute Library] Failed to import external memory to tensor!");
662 const auto st = detail::as_enum<StatusCode>(
AclGetTensorSize(_object.get(), &size));
663 report_status(st,
"[Compute Library] Failed to get the size of the tensor");
674 report_status(st,
"[Compute Library] Failed to get the descriptor of the tensor");
715 report_status(st,
"[Compute Library] Failure during tensor pack creation");
740 const size_t size = packed.size();
741 std::vector<int32_t> slots(size);
742 std::vector<AclTensor> tensors(size);
744 for(
auto &p : packed)
746 slots[i] = p.slot_id;
750 return detail::as_enum<StatusCode>(
AclPackTensors(_object.get(), tensors.data(), slots.data(), size));
788 report_status(st,
"[Compute Library] Failure during Activation operator creation");
796 #undef ARM_COMPUTE_IGNORE_UNUSED
@ AclUnsupportedTarget
Call failed as an invalid backend was requested.
int64_t boffset
Offset in terms of bytes for the first element.
AclStatus AclActivation(AclOperator *op, AclContext ctx, const AclTensorDescriptor *src, const AclTensorDescriptor *dst, const AclActivationDescriptor info)
__cplusplus
@ GpuOcl
Gpu target that leverages OpenCL.
#define OBJECT_DELETER(obj, func)
AclStatus AclDestroyOperator(AclOperator op)
Destroy a given operator object.
@ AclCpu
Cpu target that uses SIMD extensions.
#define ARM_COMPUTE_IGNORE_UNUSED(x)
Options(ExecutionMode mode, AclTargetCapabilities caps, bool enable_fast_math, const char *kernel_config, int32_t max_compute_units, AclAllocator *allocator)
Constructor.
SimpleTensor< float > src
decltype(strategy::transforms) typedef type
Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status)
Constructor.
int64_t * strides
Strides on each dimension.
void * map()
Maps the backing memory of a given tensor that can be used by the host to access any contents.
AclStatus AclGetTensorSize(AclTensor tensor, uint64_t *size)
__cplusplus
@ AclDataTypeUnknown
Unknown data type.
AclStatus AclGetTensorDescriptor(AclTensor tensor, AclTensorDescriptor *desc)
Get the descriptor of this tensor.
@ AclExhaustive
Exhaustive tuning mode, increased tuning time but with best results.
bool operator==(const ObjectBase< T > &lhs, const ObjectBase< T > &rhs)
Equality operator for library object.
@ AclHostPtr
Host allocated memory.
Activation(Context &ctx, const TensorDescriptor &src, const TensorDescriptor &dst, const ActivationDesc &desc, StatusCode *status=nullptr)
TensorDescriptor(const AclTensorDescriptor &desc)
Constructor.
@ AclUInt16
16-bit unsigned integer
AclTargetCapabilities capabilities
Target capabilities.
Queue(Context &ctx, const Options &options=Options(), StatusCode *status=nullptr)
Constructor.
AclStatus AclRunOperator(AclOperator op, AclQueue queue, AclTensorPack tensors)
Eager execution of a given operator on a list of inputs and outputs.
PackPair(Tensor *tensor_, int32_t slot_id_)
Constructor.
@ AclSuccess
Call succeeded, leading to valid state for all involved objects/data.
TensorDescriptor(const std::vector< int32_t > &shape, DataType data_type)
Constructor.
@ AclPreferFastStart
Prioritize performance when a single iterations is expected to be performed.
@ AclUnsupportedConfig
Call failed as configuration is unsupported.
@ AclInvalidObjectState
Call failed as an object has invalid state.
bool operator!=(const ObjectBase< T > &lhs, const ObjectBase< T > &rhs)
Inequality operator for library object.
constexpr E as_cenum(SE v) noexcept
Convert a strongly typed enum to an old plain c enum.
@ FastRerun
Prefer minimum latency in consecutive runs, might introduce higher startup times.
AclStatus AclMapTensor(AclTensor tensor, void **handle)
Map a tensor's backing memory to the host.
AclStatus AclCreateTensorPack(AclTensorPack *pack, AclContext ctx)
Creates a tensor pack.
const char * kernel_config_file
Kernel cofiguration file.
AclStatus AclDestroyQueue(AclQueue queue)
Destroy a given queue object.
StatusCode run(Queue &queue, TensorPack &pack)
Run an operator on a given input list.
@ AclRapid
Fast tuning mode, testing a small portion of the tuning space.
AclStatus AclDestroyContext(AclContext ctx)
Destroy a given context object.
AclStatus AclUnmapTensor(AclTensor tensor, void *handle)
Unmap the tensor's backing memory.
@ AclOutOfMemory
Call failed due to failure to allocate resources.
AclStatus AclCreateQueue(AclQueue *queue, AclContext ctx, const AclQueueOptions *options)
Create an operator queue.
Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status=nullptr)
Constructor.
Options()
Default Constructor.
uint64_t get_size()
Get the size of the tensor in byte.
ObjectBase< T > & operator=(const ObjectBase< T > &)=default
Copy assignment operator.
void reset(T *val)
Reset object value.
AclStatus AclDestroyTensor(AclTensor tensor)
Destroy a given tensor object.
@ AclInt32
32-bit signed integer
AclExecutionMode mode
Execution mode to use.
AclStatus AclQueueFinish(AclQueue queue)
Wait until all elements on the queue have been completed.
StatusCode add(std::initializer_list< PackPair > packed)
Add a list of tensors to a tensor pack.
@ AclInvalidTarget
Call failed as invalid argument was passed.
@ AclFloat16
16-bit floating point
AclDataType data_type
Tensor Data type.
AclStatus AclDestroyTensorPack(AclTensorPack pack)
Destroy a given tensor pack object.
@ AclUInt8
8-bit unsigned integer
AclStatus AclPackTensors(AclTensorPack pack, AclTensor *tensors, int32_t *slot_ids, size_t num_tensors)
A list of tensors to a tensor pack.
@ AclPreferFastRerun
Prioritize performance when multiple iterations are performed.
@ AclCpuCapabilitiesAuto
Automatic discovery of capabilities.
int32_t slot_id
Slot id in respect with the operator.
Context(Target target, const Options &options, StatusCode *status=nullptr)
Constructor.
uint64_t AclTargetCapabilities
Error codes returned by the public entry-points.
@ AclFloat32
32-bit floating point
@ AclRuntimeError
Call failed during execution.
Construct to handle destruction of objects.
Options(TuningMode mode, int32_t compute_units)
Constructor.
CLTensor * tensor
Pointer to the auxiliary tensor.
StatusCode unmap(void *handle)
Unmaps tensor's memory.
@ AclInt8
8-bit signed integer
AclStatus AclPackTensor(AclTensorPack pack, AclTensor tensor, int32_t slot_id)
Add a tensor to a tensor pack.
@ AclBFloat16
16-bit brain floating point
Queue(Context &ctx, StatusCode *status=nullptr)
Constructor.
@ AclTuningModeNone
No tuning.
struct AclTensor_ * AclTensor
Opaque Tensor pack object.
const T * get() const
Access uderlying object.
@ AclInvalidArgument
Call failed as invalid argument was passed.
~ObjectBase()=default
Destructor.
@ AclGpuOcl
OpenCL target for GPU.
StatusCode add(Tensor &tensor, int32_t slot_id)
Add tensor to tensor pack.
ImportType
Import memory types.
StatusCode
Status code enum.
AclStatus AclTensorImport(AclTensor tensor, void *handle, AclImportMemoryType type)
Import external memory to a given tensor object.
const AclTensorDescriptor * get() const
Get underlying C tensor descriptor.
@ Cpu
Cpu target that leverages SIMD.
@ FastStart
Prefer minimizing startup time.
AclStatus AclCreateTensor(AclTensor *tensor, AclContext ctx, const AclTensorDescriptor *desc, bool allocate)
Create a Tensor object.
StatusCode finish()
Block until all the tasks of the queue have been marked as finished.
bool operator==(const TensorDescriptor &other)
Operator to compare two TensorDescriptor.
static constexpr int32_t num_threads_auto
Allow runtime to specify number of threads.
AclStatus AclCreateContext(AclContext *ctx, AclTarget target, const AclContextOptions *options)
__cplusplus
int32_t * shape
Tensor Shape.
TensorPack(Context &ctx, StatusCode *status=nullptr)
Constructor.
Options()
Default Constructor.
@ AclNormal
Normal tuning mode, gives a good balance between tuning mode and performance.
int32_t ndims
Number or dimensions.
TensorDescriptor get_descriptor()
Get the descriptor of this tensor.
Object base class for library objects.
AclAllocator * allocator
Allocator to be used by all the memory internally.
@ AclUint32
32-bit unsigned integer
@ AclInt16
16-bit signed integer
Tensor * tensor
Tensor object.
constexpr SE as_enum(E val) noexcept
Convert plain old enumeration to a strongly typed enum.
@ AclUnimplemented
Call failed as requested capability is not implemented.
bool enable_fast_math
Allow precision loss.
T * get()
Access uderlying object.
Context(Target target, StatusCode *status=nullptr)
Constructor.
input allocator() -> allocate()
int32_t max_compute_units
Max compute units that can be used by a queue created from the context.