24 #ifndef ARM_COMPUTE_ACL_HPP_ 25 #define ARM_COMPUTE_ACL_HPP_ 34 #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) 39 #define ARM_COMPUTE_IGNORE_UNUSED(x) (void)(x) 75 #define OBJECT_DELETER(obj, func) \ 77 struct ObjectDeleter<obj> \ 80 static inline AclStatus Destroy(obj v) \ 102 template <
typename E,
typename SE>
117 template <
typename SE,
typename E>
120 return static_cast<SE>(val);
129 template <
typename T>
157 return _object.get();
165 return _object.get();
173 std::shared_ptr<T> _object{
nullptr };
185 template <
typename T>
188 return lhs.
get() == rhs.
get();
200 template <
typename T>
203 return !(lhs == rhs);
207 #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) 215 class Status :
public std::exception
223 Status(
StatusCode status,
const std::string &msg)
224 : _status(status), _msg(msg)
231 const char *what() const noexcept
override 266 static inline void report_status(
StatusCode status,
const std::string &msg)
270 throw Status(status, msg);
284 static inline void report_status(
StatusCode status,
const std::string &msg)
349 bool enable_fast_math,
350 const char *kernel_config,
351 int32_t max_compute_units,
387 const auto st = detail::as_enum<StatusCode>(
AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.
copts));
389 report_status(st,
"[Compute Library] Failed to create context");
434 :
opts{ detail::as_cenum<AclTuningMode>(
mode), compute_units }
465 const auto st = detail::as_enum<StatusCode>(
AclCreateQueue(&queue, ctx.
get(), &options.opts));
467 report_status(st,
"[Compute Library] Failed to create queue!");
479 return detail::as_enum<StatusCode>(
AclQueueFinish(_object.get()));
513 _cdesc.
ndims = _shape.size();
514 _cdesc.
shape = _shape.data();
515 _cdesc.
data_type = detail::as_cenum<AclDataType>(_data_type);
526 _data_type = detail::as_enum<DataType>(desc.
data_type);
527 _shape.reserve(desc.
ndims);
528 for(int32_t d = 0; d < desc.
ndims; ++d)
530 _shape.emplace_back(desc.
shape[d]);
551 is_same &= _data_type == other._data_type;
552 is_same &= _shape.size() == other._shape.size();
556 for(uint32_t d = 0; d < _shape.size(); ++d)
558 is_same &= _shape[d] == other._shape[d];
566 std::vector<int32_t> _shape{};
595 :
Tensor(ctx, desc, true, status)
608 const auto st = detail::as_enum<StatusCode>(
AclCreateTensor(&tensor, ctx.
get(), desc.
get(), allocate));
610 report_status(st,
"[Compute Library] Failed to create tensor!");
622 void *handle =
nullptr;
623 const auto st = detail::as_enum<StatusCode>(
AclMapTensor(_object.get(), &handle));
624 report_status(st,
"[Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
635 const auto st = detail::as_enum<StatusCode>(
AclUnmapTensor(_object.get(), handle));
636 report_status(st,
"[Compute Library] Failed to unmap the tensor!");
648 const auto st = detail::as_enum<StatusCode>(
AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(
type)));
649 report_status(st,
"[Compute Library] Failed to import external memory to tensor!");
661 const auto st = detail::as_enum<StatusCode>(
AclGetTensorSize(_object.get(), &size));
662 report_status(st,
"[Compute Library] Failed to get the size of the tensor");
673 report_status(st,
"[Compute Library] Failed to get the descriptor of the tensor");
714 report_status(st,
"[Compute Library] Failure during tensor pack creation");
729 return detail::as_enum<StatusCode>(
AclPackTensor(_object.get(), tensor.
get(), slot_id));
739 const size_t size = packed.size();
740 std::vector<int32_t> slots(size);
741 std::vector<AclTensor> tensors(size);
743 for(
auto &p : packed)
745 slots[i] = p.slot_id;
749 return detail::as_enum<StatusCode>(
AclPackTensors(_object.get(), tensors.data(), slots.data(), size));
753 #undef ARM_COMPUTE_IGNORE_UNUSED
struct AclTensor_ * AclTensor
Opaque Tensor pack object.
Options(ExecutionMode mode, AclTargetCapabilities caps, bool enable_fast_math, const char *kernel_config, int32_t max_compute_units, AclAllocator *allocator)
Constructor.
AclStatus AclMapTensor(AclTensor tensor, void **handle)
Map a tensor's backing memory to the host.
AclStatus AclDestroyQueue(AclQueue queue)
Destroy a given queue object.
AclStatus AclUnmapTensor(AclTensor tensor, void *handle)
Unmap the tensor's backing memory.
Call failed during execution.
Prioritize performance when a single iterations is expected to be performed.
AclStatus AclCreateTensorPack(AclTensorPack *pack, AclContext ctx)
Creates a tensor pack.
AclStatus AclCreateQueue(AclQueue *queue, AclContext ctx, const AclQueueOptions *options)
Create an operator queue.
Tensor * tensor
Tensor object.
AclStatus AclQueueFinish(AclQueue queue)
Wait until all elements on the queue have been completed.
int64_t * strides
Strides on each dimension.
Normal tuning mode, gives a good balance between tuning mode and performance.
Call failed as invalid argument was passed.
StatusCode add(Tensor &tensor, int32_t slot_id)
Add tensor to tensor pack.
Options(TuningMode mode, int32_t compute_units)
Constructor.
#define ARM_COMPUTE_IGNORE_UNUSED(x)
Automatic discovery of capabilities.
void reset(T *val)
Reset object value.
int32_t slot_id
Slot id in respect with the operator.
void * map()
Maps the backing memory of a given tensor that can be used by the host to access any contents.
const char * kernel_config_file
Kernel cofiguration file.
int32_t max_compute_units
Max compute units that can be used by a queue created from the context.
TensorPack(Context &ctx, StatusCode *status=nullptr)
Constructor.
decltype(strategy::transforms) typedef type
Context(Target target, const Options &options, StatusCode *status=nullptr)
Constructor.
int32_t ndims
Number or dimensions.
constexpr SE as_enum(E val) noexcept
Convert plain old enumeration to a strongly typed enum.
ImportType
Import memory types.
AclExecutionMode mode
Execution mode to use.
AclTargetCapabilities capabilities
Target capabilities.
uint64_t AclTargetCapabilities
Error codes returned by the public entry-points.
Call failed as invalid argument was passed.
PackPair(Tensor *tensor_, int32_t slot_id_)
Constructor.
Call failed as an invalid backend was requested.
Cpu target that uses SIMD extensions.
TensorDescriptor(const std::vector< int32_t > &shape, DataType data_type)
Constructor.
Options()
Default Constructor.
constexpr E as_cenum(SE v) noexcept
Convert a strongly typed enum to an old plain c enum.
AclStatus AclGetTensorDescriptor(AclTensor tensor, AclTensorDescriptor *desc)
Get the descriptor of this tensor.
AclStatus AclDestroyTensorPack(AclTensorPack pack)
Destroy a given tensor pack object.
TensorDescriptor(const AclTensorDescriptor &desc)
Constructor.
int64_t boffset
Offset in terms of bytes for the first element.
StatusCode finish()
Block until all the tasks of the queue have been marked as finished.
T * get()
Access uderlying object.
StatusCode
Status code enum.
Call failed as configuration is unsupported.
StatusCode unmap(void *handle)
Unmaps tensor's memory.
bool operator==(const ObjectBase< T > &lhs, const ObjectBase< T > &rhs)
Equality operator for library object.
Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status=nullptr)
Constructor.
Call succeeded, leading to valid state for all involved objects/data.
Fast tuning mode, testing a small portion of the tuning space.
Cpu target that leverages SIMD.
Call failed as requested capability is not implemented.
Prefer minimum latency in consecutive runs, might introduce higher startup times.
bool operator!=(const ObjectBase< T > &lhs, const ObjectBase< T > &rhs)
Inequality operator for library object.
AclStatus AclPackTensor(AclTensorPack pack, AclTensor tensor, int32_t slot_id)
Add a tensor to a tensor pack.
AclStatus AclDestroyTensor(AclTensor tensor)
Destroy a given tensor object.
input allocator() -> allocate()
Prefer minimizing startup time.
16-bit brain floating point
AclDataType data_type
Tensor Data type.
AclStatus AclCreateTensor(AclTensor *tensor, AclContext ctx, const AclTensorDescriptor *desc, bool allocate)
Create a Tensor object.
Gpu target that leverages OpenCL.
~ObjectBase()=default
Destructor.
AclStatus AclDestroyContext(AclContext ctx)
Destroy a given context object.
AclStatus AclGetTensorSize(AclTensor tensor, uint64_t *size)
__cplusplus
AclStatus AclPackTensors(AclTensorPack pack, AclTensor *tensors, int32_t *slot_ids, size_t num_tensors)
A list of tensors to a tensor pack.
uint64_t get_size()
Get the size of the tensor in byte.
TensorDescriptor get_descriptor()
Get the descriptor of this tensor.
Call failed due to failure to allocate resources.
static constexpr int32_t num_threads_auto
Allow runtime to specify number of threads.
bool enable_fast_math
Allow precision loss.
Queue(Context &ctx, StatusCode *status=nullptr)
Constructor.
Exhaustive tuning mode, increased tuning time but with best results.
Options()
Default Constructor.
const T * get() const
Access uderlying object.
bool operator==(const TensorDescriptor &other)
Operator to compare two TensorDescriptor.
ObjectBase< T > & operator=(const ObjectBase< T > &)=default
Copy assignment operator.
const AclTensorDescriptor * get() const
Get underlying C tensor descriptor.
Prioritize performance when multiple iterations are performed.
StatusCode add(std::initializer_list< PackPair > packed)
Add a list of tensors to a tensor pack.
AclStatus AclTensorImport(AclTensor tensor, void *handle, AclImportMemoryType type)
Import external memory to a given tensor object.
AclAllocator * allocator
Allocator to be used by all the memory internally.
int32_t * shape
Tensor Shape.
Call failed as an object has invalid state.
Queue(Context &ctx, const Options &options=Options(), StatusCode *status=nullptr)
Constructor.
Object base class for library objects.
Queue classData type enumeration.
Construct to handle destruction of objects.
Context(Target target, StatusCode *status=nullptr)
Constructor.
Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status)
Constructor.
Context classAvailable tuning modes.
#define OBJECT_DELETER(obj, func)
AclStatus AclCreateContext(AclContext *ctx, AclTarget target, const AclContextOptions *options)
__cplusplus