24 #ifndef ARM_COMPUTE_ACL_HPP_ 25 #define ARM_COMPUTE_ACL_HPP_ 34 #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) 39 #define ARM_COMPUTE_IGNORE_UNUSED(x) (void)(x) 75 #define OBJECT_DELETER(obj, func) \ 77 struct ObjectDeleter<obj> \ 80 static inline AclStatus Destroy(obj v) \ 103 template <
typename E,
typename SE>
118 template <
typename SE,
typename E>
121 return static_cast<SE
>(val);
130 template <
typename T>
158 return _object.get();
166 return _object.get();
174 std::shared_ptr<T> _object{
nullptr };
186 template <
typename T>
189 return lhs.
get() == rhs.
get();
201 template <
typename T>
204 return !(lhs == rhs);
208 #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) 216 class Status :
public std::exception
224 Status(
StatusCode status,
const std::string &msg)
225 : _status(status), _msg(msg)
232 const char *what()
const noexcept
override 267 static inline void report_status(
StatusCode status,
const std::string &msg)
271 throw Status(status, msg);
285 static inline void report_status(
StatusCode status,
const std::string &msg)
323 static constexpr int32_t num_threads_auto = -1;
350 bool enable_fast_math,
351 const char *kernel_config,
352 int32_t max_compute_units,
355 copts.mode = detail::as_cenum<AclExecutionMode>(
mode);
356 copts.capabilities = caps;
357 copts.enable_fast_math = enable_fast_math;
358 copts.kernel_config_file = kernel_config;
359 copts.max_compute_units = max_compute_units;
388 const auto st = detail::as_enum<StatusCode>(
AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.
copts));
390 report_status(st,
"[Compute Library] Failed to create context");
435 : opts{ detail::as_cenum<AclTuningMode>(
mode), compute_units }
466 const auto st = detail::as_enum<StatusCode>(
AclCreateQueue(&queue, ctx.
get(), &options.opts));
468 report_status(st,
"[Compute Library] Failed to create queue!");
480 return detail::as_enum<StatusCode>(
AclQueueFinish(_object.get()));
512 : _shape(shape), _data_type(data_type)
514 _cdesc.ndims = _shape.size();
515 _cdesc.shape = _shape.data();
516 _cdesc.data_type = detail::as_cenum<AclDataType>(_data_type);
517 _cdesc.strides =
nullptr;
527 _data_type = detail::as_enum<DataType>(desc.
data_type);
528 _shape.reserve(desc.
ndims);
529 for(int32_t d = 0; d < desc.
ndims; ++d)
531 _shape.emplace_back(desc.
shape[d]);
552 is_same &= _data_type == other._data_type;
553 is_same &= _shape.size() == other._shape.size();
557 for(uint32_t d = 0; d < _shape.size(); ++d)
559 is_same &= _shape[d] == other._shape[d];
567 std::vector<int32_t> _shape{};
596 :
Tensor(ctx, desc, true, status)
609 const auto st = detail::as_enum<StatusCode>(
AclCreateTensor(&tensor, ctx.
get(), desc.
get(), allocate));
611 report_status(st,
"[Compute Library] Failed to create tensor!");
623 void *handle =
nullptr;
624 const auto st = detail::as_enum<StatusCode>(
AclMapTensor(_object.get(), &handle));
625 report_status(st,
"[Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
636 const auto st = detail::as_enum<StatusCode>(
AclUnmapTensor(_object.get(), handle));
637 report_status(st,
"[Compute Library] Failed to unmap the tensor!");
649 const auto st = detail::as_enum<StatusCode>(
AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(
type)));
650 report_status(st,
"[Compute Library] Failed to import external memory to tensor!");
662 const auto st = detail::as_enum<StatusCode>(
AclGetTensorSize(_object.get(), &size));
663 report_status(st,
"[Compute Library] Failed to get the size of the tensor");
674 report_status(st,
"[Compute Library] Failed to get the descriptor of the tensor");
696 : tensor(tensor_), slot_id(slot_id_)
715 report_status(st,
"[Compute Library] Failure during tensor pack creation");
730 return detail::as_enum<StatusCode>(
AclPackTensor(_object.get(), tensor.
get(), slot_id));
740 const size_t size = packed.size();
741 std::vector<int32_t> slots(size);
742 std::vector<AclTensor> tensors(size);
744 for(
auto &p : packed)
746 slots[i] = p.slot_id;
750 return detail::as_enum<StatusCode>(
AclPackTensors(_object.get(), tensors.data(), slots.data(), size));
788 report_status(st,
"[Compute Library] Failure during Activation operator creation");
796 #undef ARM_COMPUTE_IGNORE_UNUSED
struct AclTensor_ * AclTensor
Opaque Tensor pack object.
Options(ExecutionMode mode, AclTargetCapabilities caps, bool enable_fast_math, const char *kernel_config, int32_t max_compute_units, AclAllocator *allocator)
Constructor.
AclStatus AclMapTensor(AclTensor tensor, void **handle)
Map a tensor's backing memory to the host.
AclStatus AclDestroyQueue(AclQueue queue)
Destroy a given queue object.
AclStatus AclUnmapTensor(AclTensor tensor, void *handle)
Unmap the tensor's backing memory.
Call failed during execution.
Prioritize performance when a single iterations is expected to be performed.
Activation(Context &ctx, const TensorDescriptor &src, const TensorDescriptor &dst, const ActivationDesc &desc, StatusCode *status=nullptr)
AclStatus AclCreateTensorPack(AclTensorPack *pack, AclContext ctx)
Creates a tensor pack.
AclStatus AclRunOperator(AclOperator op, AclQueue queue, AclTensorPack tensors)
Eager execution of a given operator on a list of inputs and outputs.
AclStatus AclCreateQueue(AclQueue *queue, AclContext ctx, const AclQueueOptions *options)
Create an operator queue.
AclStatus AclQueueFinish(AclQueue queue)
Wait until all elements on the queue have been completed.
Normal tuning mode, gives a good balance between tuning mode and performance.
Call failed as invalid argument was passed.
StatusCode add(Tensor &tensor, int32_t slot_id)
Add tensor to tensor pack.
Options(TuningMode mode, int32_t compute_units)
Constructor.
#define ARM_COMPUTE_IGNORE_UNUSED(x)
Automatic discovery of capabilities.
AclStatus AclDestroyOperator(AclOperator op)
Destroy a given operator object.
void reset(T *val)
Reset object value.
void * map()
Maps the backing memory of a given tensor that can be used by the host to access any contents...
TensorPack(Context &ctx, StatusCode *status=nullptr)
Constructor.
decltype(strategy::transforms) typedef type
Context(Target target, const Options &options, StatusCode *status=nullptr)
Constructor.
SimpleTensor< float > src
int32_t ndims
Number or dimensions.
StatusCode run(Queue &queue, TensorPack &pack)
Run an operator on a given input list.
constexpr SE as_enum(E val) noexcept
Convert plain old enumeration to a strongly typed enum.
ImportType
Import memory types.
uint64_t AclTargetCapabilities
Error codes returned by the public entry-points.
Call failed as invalid argument was passed.
PackPair(Tensor *tensor_, int32_t slot_id_)
Constructor.
Call failed as an invalid backend was requested.
Cpu target that uses SIMD extensions.
TensorDescriptor(const std::vector< int32_t > &shape, DataType data_type)
Constructor.
Options()
Default Constructor.
constexpr E as_cenum(SE v) noexcept
Convert a strongly typed enum to an old plain c enum.
AclStatus AclGetTensorDescriptor(AclTensor tensor, AclTensorDescriptor *desc)
Get the descriptor of this tensor.
AclStatus AclDestroyTensorPack(AclTensorPack pack)
Destroy a given tensor pack object.
TensorDescriptor(const AclTensorDescriptor &desc)
Constructor.
StatusCode finish()
Block until all the tasks of the queue have been marked as finished.
StatusCode
Status code enum.
Call failed as configuration is unsupported.
StatusCode unmap(void *handle)
Unmaps tensor's memory.
bool operator==(const ObjectBase< T > &lhs, const ObjectBase< T > &rhs)
Equality operator for library object.
Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status=nullptr)
Constructor.
Call succeeded, leading to valid state for all involved objects/data.
Fast tuning mode, testing a small portion of the tuning space.
Cpu target that leverages SIMD.
Call failed as requested capability is not implemented.
Prefer minimum latency in consecutive runs, might introduce higher startup times. ...
bool operator!=(const ObjectBase< T > &lhs, const ObjectBase< T > &rhs)
Inequality operator for library object.
AclStatus AclPackTensor(AclTensorPack pack, AclTensor tensor, int32_t slot_id)
Add a tensor to a tensor pack.
AclStatus AclDestroyTensor(AclTensor tensor)
Destroy a given tensor object.
input allocator() -> allocate()
Prefer minimizing startup time.
16-bit brain floating point
AclStatus AclActivation(AclOperator *op, AclContext ctx, const AclTensorDescriptor *src, const AclTensorDescriptor *dst, const AclActivationDescriptor info)
__cplusplus
AclDataType data_type
Tensor Data type.
AclStatus AclCreateTensor(AclTensor *tensor, AclContext ctx, const AclTensorDescriptor *desc, bool allocate)
Create a Tensor object.
Gpu target that leverages OpenCL.
AclStatus AclDestroyContext(AclContext ctx)
Destroy a given context object.
AclStatus AclGetTensorSize(AclTensor tensor, uint64_t *size)
__cplusplus
AclStatus AclPackTensors(AclTensorPack pack, AclTensor *tensors, int32_t *slot_ids, size_t num_tensors)
A list of tensors to a tensor pack.
uint64_t get_size()
Get the size of the tensor in byte.
TensorDescriptor get_descriptor()
Get the descriptor of this tensor.
Call failed due to failure to allocate resources.
Queue(Context &ctx, StatusCode *status=nullptr)
Constructor.
Exhaustive tuning mode, increased tuning time but with best results.
Options()
Default Constructor.
const T * get() const
Access uderlying object.
bool operator==(const TensorDescriptor &other)
Operator to compare two TensorDescriptor.
const AclTensorDescriptor * get() const
Get underlying C tensor descriptor.
Prioritize performance when multiple iterations are performed.
StatusCode add(std::initializer_list< PackPair > packed)
Add a list of tensors to a tensor pack.
AclStatus AclTensorImport(AclTensor tensor, void *handle, AclImportMemoryType type)
Import external memory to a given tensor object.
int32_t * shape
Tensor Shape.
Call failed as an object has invalid state.
Queue(Context &ctx, const Options &options=Options(), StatusCode *status=nullptr)
Constructor.
Object base class for library objects.
Queue classData type enumeration.
Construct to handle destruction of objects.
Context(Target target, StatusCode *status=nullptr)
Constructor.
Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status)
Constructor.
Context classAvailable tuning modes.
#define OBJECT_DELETER(obj, func)
AclStatus AclCreateContext(AclContext *ctx, AclTarget target, const AclContextOptions *options)
__cplusplus