Compute Library
 23.08
Acl.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_ACL_HPP_
25 #define ARM_COMPUTE_ACL_HPP_
26 
27 #include "arm_compute/Acl.h"
28 
29 #include <cstdlib>
30 #include <memory>
31 #include <string>
32 #include <vector>
33 
34 #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED)
35 #include <exception>
36 #endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
37 
38 // Helper Macros
39 #define ARM_COMPUTE_IGNORE_UNUSED(x) (void)(x)
40 
41 namespace acl
42 {
43 // Forward declarations
44 class Context;
45 class Queue;
46 class Tensor;
47 class TensorPack;
48 
49 /**< Status code enum */
50 enum class StatusCode
51 {
61 };
62 
63 /**< Utility namespace containing helpers functions */
64 namespace detail
65 {
66 /** Construct to handle destruction of objects
67  *
68  * @tparam T Object base type
69  */
70 template <typename T>
72 {
73 };
74 
75 #define OBJECT_DELETER(obj, func) \
76  template <> \
77  struct ObjectDeleter<obj> \
78  \
79  { \
80  static inline AclStatus Destroy(obj v) \
81  { \
82  return func(v); \
83  } \
84  };
85 
91 
92 #undef OBJECT_DELETER
93 
94 /** Convert a strongly typed enum to an old plain c enum
95  *
96  * @tparam E Plain old C enum
97  * @tparam SE Strongly typed resulting enum
98  *
99  * @param[in] v Value to convert
100  *
101  * @return A corresponding plain old C enumeration
102  */
103 template <typename E, typename SE>
104 constexpr E as_cenum(SE v) noexcept
105 {
106  return static_cast<E>(static_cast<typename std::underlying_type<SE>::type>(v));
107 }
108 
109 /** Convert plain old enumeration to a strongly typed enum
110  *
111  * @tparam SE Strongly typed resulting enum
112  * @tparam E Plain old C enum
113  *
114  * @param[in] val Value to convert
115  *
116  * @return A corresponding strongly typed enumeration
117  */
118 template <typename SE, typename E>
119 constexpr SE as_enum(E val) noexcept
120 {
121  return static_cast<SE>(val);
122 }
123 
124 /** Object base class for library objects
125  *
126  * Class is defining basic common interface for all the library objects
127  *
128  * @tparam T Object type to be templated on
129  */
130 template <typename T>
132 {
133 public:
134  /** Destructor */
135  ~ObjectBase() = default;
136  /** Copy constructor */
137  ObjectBase(const ObjectBase<T> &) = default;
138  /** Move Constructor */
139  ObjectBase(ObjectBase<T> &&) = default;
140  /** Copy assignment operator */
141  ObjectBase<T> &operator=(const ObjectBase<T> &) = default;
142  /** Move assignment operator */
143  ObjectBase<T> &operator=(ObjectBase<T> &&) = default;
144  /** Reset object value
145  *
146  * @param [in] val Value to set
147  */
148  void reset(T *val)
149  {
150  _object.reset(val, detail::ObjectDeleter<T *>::Destroy);
151  }
152  /** Access uderlying object
153  *
154  * @return Underlying object
155  */
156  const T *get() const
157  {
158  return _object.get();
159  }
160  /** Access uderlying object
161  *
162  * @return Underlying object
163  */
164  T *get()
165  {
166  return _object.get();
167  }
168 
169 protected:
170  /** Constructor */
171  ObjectBase() = default;
172 
173 protected:
174  std::shared_ptr<T> _object{ nullptr }; /**< Library object */
175 };
176 
177 /** Equality operator for library object
178  *
179  * @tparam T Parameter to template on
180  *
181  * @param[in] lhs Left hand-side argument
182  * @param[in] rhs Right hand-side argument
183  *
184  * @return True if objects are equal, else false
185  */
186 template <typename T>
187 bool operator==(const ObjectBase<T> &lhs, const ObjectBase<T> &rhs)
188 {
189  return lhs.get() == rhs.get();
190 }
191 
192 /** Inequality operator for library object
193  *
194  * @tparam T Parameter to template on
195  *
196  * @param[in] lhs Left hand-side argument
197  * @param[in] rhs Right hand-side argument
198  *
199  * @return True if objects are equal, else false
200  */
201 template <typename T>
202 bool operator!=(const ObjectBase<T> &lhs, const ObjectBase<T> &rhs)
203 {
204  return !(lhs == rhs);
205 }
206 } // namespace detail
207 
208 #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED)
209 /** Status class
210  *
211  * Class is an extension of std::exception and contains the underlying
212  * status construct and an error explanatory message to be reported.
213  *
214  * @note Class is visible only when exceptions are enabled during compilation
215  */
216 class Status : public std::exception
217 {
218 public:
219  /** Constructor
220  *
221  * @param[in] status Status returned
222  * @param[in] msg Error message to be bound with the exception
223  */
224  Status(StatusCode status, const std::string &msg)
225  : _status(status), _msg(msg)
226  {
227  }
228  /** Returns an explanatory exception message
229  *
230  * @return Status message
231  */
232  const char *what() const noexcept override
233  {
234  return _msg.c_str();
235  }
236  /** Underlying status accessor
237  *
238  * @return Status code
239  */
240  StatusCode status() const
241  {
242  return _status;
243  }
244  /** Explicit status converter
245  *
246  * @return Status code
247  */
248  explicit operator StatusCode() const
249  {
250  return _status;
251  }
252 
253 private:
254  StatusCode _status; /**< Status code */
255  std::string _msg; /**< Status message */
256 };
257 
258 /** Reports an error status and throws an exception object in case of failure
259  *
260  * @note This implementation is used when exceptions are enabled during compilation
261  *
262  * @param[in] status Status to report
263  * @param[in] msg Explanatory error messaged
264  *
265  * @return Status code
266  */
267 static inline void report_status(StatusCode status, const std::string &msg)
268 {
269  if(status != StatusCode::Success)
270  {
271  throw Status(status, msg);
272  }
273 }
274 #else /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
275 /** Reports a status code
276  *
277  * @note This implementation is used when exceptions are disabled during compilation
278  * @note Message is surpressed and not reported in this case
279  *
280  * @param[in] status Status to report
281  * @param[in] msg Explanatory error messaged
282  *
283  * @return Status code
284  */
285 static inline void report_status(StatusCode status, const std::string &msg)
286 {
289 }
290 #endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
291 
292 /**< Target enum */
293 enum class Target
294 {
295  Cpu = AclCpu, /**< Cpu target that leverages SIMD */
296  GpuOcl = AclGpuOcl /**< Gpu target that leverages OpenCL */
297 };
298 
299 /**< Available execution modes */
300 enum class ExecutionMode
301 {
302  FastRerun = AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */
303  FastStart = AclPreferFastStart, /**< Prefer minimizing startup time */
304 };
305 
306 /** Context class
307  *
308  * Context acts as a central aggregate service for further objects created from it.
309  * It provides, internally, common facilities in order to avoid the use of global
310  * statically initialized objects that can lead to important side-effect under
311  * specific execution contexts.
312  *
313  * For example context contains allocators for object creation, for further backing memory allocation,
314  * any serialization interfaces and other modules that affect the construction of objects,
315  * like program caches for OpenCL.
316  */
317 class Context : public detail::ObjectBase<AclContext_>
318 {
319 public:
320  /**< Context options */
321  struct Options
322  {
323  static constexpr int32_t num_threads_auto = -1; /**< Allow runtime to specify number of threads */
324 
325  /** Default Constructor
326  *
327  * @note By default no precision loss is enabled for operators
328  * @note By default the preferred execution mode is to favor multiple consecutive reruns of an operator
329  */
331  : Options(ExecutionMode::FastRerun /* mode */,
332  AclCpuCapabilitiesAuto /* caps */,
333  false /* enable_fast_math */,
334  nullptr /* kernel_config */,
335  num_threads_auto /* max_compute_units */,
336  nullptr /* allocator */)
337  {
338  }
339  /** Constructor
340  *
341  * @param[in] mode Execution mode to be used
342  * @param[in] caps Capabilities to be used
343  * @param[in] enable_fast_math Allow precision loss in favor of performance
344  * @param[in] kernel_config Kernel configuration file containing construction tuning meta-data
345  * @param[in] max_compute_units Max compute units that are expected to used
346  * @param[in] allocator Allocator to be used for internal memory allocation
347  */
350  bool enable_fast_math,
351  const char *kernel_config,
352  int32_t max_compute_units,
354  {
355  copts.mode = detail::as_cenum<AclExecutionMode>(mode);
356  copts.capabilities = caps;
357  copts.enable_fast_math = enable_fast_math;
358  copts.kernel_config_file = kernel_config;
359  copts.max_compute_units = max_compute_units;
361  }
362 
364  };
365 
366 public:
367  /** Constructor
368  *
369  * @note Serves as a simpler delegate constructor
370  * @note As context options, default conservative options will be used
371  *
372  * @param[in] target Target to create context for
373  * @param[out] status Status information if requested
374  */
375  explicit Context(Target target, StatusCode *status = nullptr)
376  : Context(target, Options(), status)
377  {
378  }
379  /** Constructor
380  *
381  * @param[in] target Target to create context for
382  * @param[in] options Context construction options
383  * @param[out] status Status information if requested
384  */
385  Context(Target target, const Options &options, StatusCode *status = nullptr)
386  {
387  AclContext ctx;
388  const auto st = detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.copts));
389  reset(ctx);
390  report_status(st, "[Compute Library] Failed to create context");
391  if(status)
392  {
393  *status = st;
394  }
395  }
396 };
397 
398 /**< Available tuning modes */
399 enum class TuningMode
400 {
401  Rapid = AclRapid,
402  Normal = AclNormal,
404 };
405 
406 /** Queue class
407  *
408  * Queue is responsible for the execution related aspects, with main responsibilities those of
409  * scheduling and tuning operators.
410  *
411  * Multiple queues can be created from the same context, and the same operator can be scheduled on each concurrently.
412  *
413  * @note An operator might depend on the maximum possible compute units that are provided in the context,
414  * thus in cases where the number of the scheduling units of the queue are greater might lead to errors.
415  */
416 class Queue : public detail::ObjectBase<AclQueue_>
417 {
418 public:
419  /**< Queue options */
420  struct Options
421  {
422  /** Default Constructor
423  *
424  * As default options, no tuning will be performed, and the number of scheduling units will
425  * depends on internal device discovery functionality
426  */
428  : opts{ AclTuningModeNone, 0 } {};
429  /** Constructor
430  *
431  * @param[in] mode Tuning mode to be used
432  * @param[in] compute_units Number of scheduling units to be used
433  */
434  Options(TuningMode mode, int32_t compute_units)
435  : opts{ detail::as_cenum<AclTuningMode>(mode), compute_units }
436  {
437  }
438 
440  };
441 
442 public:
443  /** Constructor
444  *
445  * @note Serves as a simpler delegate constructor
446  * @note As queue options, default conservative options will be used
447  *
448  * @param[in] ctx Context to create queue for
449  * @param[out] status Status information if requested
450  */
451  explicit Queue(Context &ctx, StatusCode *status = nullptr)
452  : Queue(ctx, Options(), status)
453  {
454  }
455  /** Constructor
456  *
457  * @note As queue options, default conservative options will be used
458  *
459  * @param[in] ctx Context from where the queue will be created from
460  * @param[in] options Queue options to be used
461  * @param[out] status Status information if requested
462  */
463  explicit Queue(Context &ctx, const Options &options = Options(), StatusCode *status = nullptr)
464  {
465  AclQueue queue;
466  const auto st = detail::as_enum<StatusCode>(AclCreateQueue(&queue, ctx.get(), &options.opts));
467  reset(queue);
468  report_status(st, "[Compute Library] Failed to create queue!");
469  if(status)
470  {
471  *status = st;
472  }
473  }
474  /** Block until all the tasks of the queue have been marked as finished
475  *
476  * @return Status code
477  */
479  {
480  return detail::as_enum<StatusCode>(AclQueueFinish(_object.get()));
481  }
482 };
483 
484 /**< Data type enumeration */
485 enum class DataType
486 {
488  UInt8 = AclUInt8,
489  Int8 = AclInt8,
490  UInt16 = AclUInt16,
491  Int16 = AclInt16,
492  UInt32 = AclUint32,
493  Int32 = AclInt32,
497 };
498 
499 /** Tensor Descriptor class
500  *
501  * Structure that contains all the required meta-data to represent a tensor
502  */
504 {
505 public:
506  /** Constructor
507  *
508  * @param[in] shape Shape of the tensor
509  * @param[in] data_type Data type of the tensor
510  */
511  TensorDescriptor(const std::vector<int32_t> &shape, DataType data_type)
512  : _shape(shape), _data_type(data_type)
513  {
514  _cdesc.ndims = _shape.size();
515  _cdesc.shape = _shape.data();
516  _cdesc.data_type = detail::as_cenum<AclDataType>(_data_type);
517  _cdesc.strides = nullptr;
518  _cdesc.boffset = 0;
519  }
520  /** Constructor
521  *
522  * @param[in] desc C-type descriptor
523  */
524  explicit TensorDescriptor(const AclTensorDescriptor &desc)
525  {
526  _cdesc = desc;
527  _data_type = detail::as_enum<DataType>(desc.data_type);
528  _shape.reserve(desc.ndims);
529  for(int32_t d = 0; d < desc.ndims; ++d)
530  {
531  _shape.emplace_back(desc.shape[d]);
532  }
533  }
534  /** Get underlying C tensor descriptor
535  *
536  * @return Underlying structure
537  */
538  const AclTensorDescriptor *get() const
539  {
540  return &_cdesc;
541  }
542  /** Operator to compare two TensorDescriptor
543  *
544  * @param[in] other The instance to compare against
545  *
546  * @return True if two instances have the same shape and data type
547  */
548  bool operator==(const TensorDescriptor &other)
549  {
550  bool is_same = true;
551 
552  is_same &= _data_type == other._data_type;
553  is_same &= _shape.size() == other._shape.size();
554 
555  if(is_same)
556  {
557  for(uint32_t d = 0; d < _shape.size(); ++d)
558  {
559  is_same &= _shape[d] == other._shape[d];
560  }
561  }
562 
563  return is_same;
564  }
565 
566 private:
567  std::vector<int32_t> _shape{};
568  DataType _data_type{};
569  AclTensorDescriptor _cdesc{};
570 };
571 
572 /** Import memory types */
573 enum class ImportType
574 {
576 };
577 
578 /** Tensor class
579  *
580  * Tensor is an mathematical construct that can represent an N-Dimensional space.
581  *
582  * @note Maximum dimensionality support is 6 internally at the moment
583  */
584 class Tensor : public detail::ObjectBase<AclTensor_>
585 {
586 public:
587  /** Constructor
588  *
589  * @note Tensor memory is allocated
590  *
591  * @param[in] ctx Context from where the tensor will be created from
592  * @param[in] desc Tensor descriptor to be used
593  * @param[out] status Status information if requested
594  */
595  Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr)
596  : Tensor(ctx, desc, true, status)
597  {
598  }
599  /** Constructor
600  *
601  * @param[in] ctx Context from where the tensor will be created from
602  * @param[in] desc Tensor descriptor to be used
603  * @param[in] allocate Flag to indicate if the tensor needs to be allocated
604  * @param[out] status Status information if requested
605  */
606  Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status)
607  {
609  const auto st = detail::as_enum<StatusCode>(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate));
610  reset(tensor);
611  report_status(st, "[Compute Library] Failed to create tensor!");
612  if(status)
613  {
614  *status = st;
615  }
616  }
617  /** Maps the backing memory of a given tensor that can be used by the host to access any contents
618  *
619  * @return A valid non-zero pointer in case of success else nullptr
620  */
621  void *map()
622  {
623  void *handle = nullptr;
624  const auto st = detail::as_enum<StatusCode>(AclMapTensor(_object.get(), &handle));
625  report_status(st, "[Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
626  return handle;
627  }
628  /** Unmaps tensor's memory
629  *
630  * @param[in] handle Handle to unmap
631  *
632  * @return Status code
633  */
634  StatusCode unmap(void *handle)
635  {
636  const auto st = detail::as_enum<StatusCode>(AclUnmapTensor(_object.get(), handle));
637  report_status(st, "[Compute Library] Failed to unmap the tensor!");
638  return st;
639  }
640  /** Import external memory to a given tensor object
641  *
642  * @param[in] handle External memory handle
643  * @param[in] type Type of memory to be imported
644  *
645  * @return Status code
646  */
647  StatusCode import(void *handle, ImportType type)
648  {
649  const auto st = detail::as_enum<StatusCode>(AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(type)));
650  report_status(st, "[Compute Library] Failed to import external memory to tensor!");
651  return st;
652  }
653  /** Get the size of the tensor in byte
654  *
655  * @note The size isn't based on allocated memory, but based on information in its descriptor (dimensions, data type, etc.).
656  *
657  * @return The size of the tensor in byte
658  */
659  uint64_t get_size()
660  {
661  uint64_t size{ 0 };
662  const auto st = detail::as_enum<StatusCode>(AclGetTensorSize(_object.get(), &size));
663  report_status(st, "[Compute Library] Failed to get the size of the tensor");
664  return size;
665  }
666  /** Get the descriptor of this tensor
667  *
668  * @return The descriptor describing the characteristics of this tensor
669  */
671  {
672  AclTensorDescriptor desc;
673  const auto st = detail::as_enum<StatusCode>(AclGetTensorDescriptor(_object.get(), &desc));
674  report_status(st, "[Compute Library] Failed to get the descriptor of the tensor");
675  return TensorDescriptor(desc);
676  }
677 };
678 
679 /** Tensor pack class
680  *
681  * Pack is a utility construct that is used to create a collection of tensors that can then
682  * be passed into operator as inputs.
683  */
684 class TensorPack : public detail::ObjectBase<AclTensorPack_>
685 {
686 public:
687  /** Pack pair construct */
688  struct PackPair
689  {
690  /** Constructor
691  *
692  * @param[in] tensor_ Tensor to pack
693  * @param[in] slot_id_ Slot identification of the tensor in respect with the operator
694  */
695  PackPair(Tensor *tensor_, int32_t slot_id_)
696  : tensor(tensor_), slot_id(slot_id_)
697  {
698  }
699 
700  Tensor *tensor{ nullptr }; /**< Tensor object */
701  int32_t slot_id{ AclSlotUnknown }; /**< Slot id in respect with the operator */
702  };
703 
704 public:
705  /** Constructor
706  *
707  * @param[in] ctx Context from where the tensor pack will be created from
708  * @param[out] status Status information if requested
709  */
710  explicit TensorPack(Context &ctx, StatusCode *status = nullptr)
711  {
713  const auto st = detail::as_enum<StatusCode>(AclCreateTensorPack(&pack, ctx.get()));
714  reset(pack);
715  report_status(st, "[Compute Library] Failure during tensor pack creation");
716  if(status)
717  {
718  *status = st;
719  }
720  }
721  /** Add tensor to tensor pack
722  *
723  * @param[in] slot_id Slot id of the tensor in respect with the operator
724  * @param[in] tensor Tensor to be added in the pack
725  *
726  * @return Status code
727  */
728  StatusCode add(Tensor &tensor, int32_t slot_id)
729  {
730  return detail::as_enum<StatusCode>(AclPackTensor(_object.get(), tensor.get(), slot_id));
731  }
732  /** Add a list of tensors to a tensor pack
733  *
734  * @param[in] packed Pair packs to be added
735  *
736  * @return Status code
737  */
738  StatusCode add(std::initializer_list<PackPair> packed)
739  {
740  const size_t size = packed.size();
741  std::vector<int32_t> slots(size);
742  std::vector<AclTensor> tensors(size);
743  int i = 0;
744  for(auto &p : packed)
745  {
746  slots[i] = p.slot_id;
747  tensors[i] = AclTensor(p.tensor);
748  ++i;
749  }
750  return detail::as_enum<StatusCode>(AclPackTensors(_object.get(), tensors.data(), slots.data(), size));
751  }
752 };
753 
754 /** Operator class
755  *
756  * Operators are the basic algorithmic blocks responsible for performing distinct operations
757  */
758 class Operator : public detail::ObjectBase<AclOperator_>
759 {
760 public:
761  /** Run an operator on a given input list
762  *
763  * @param[in,out] queue Queue to scheduler the operator on
764  * @param pack Tensor list to be used as input
765  *
766  * @return Status Code
767  */
769  {
770  return detail::as_cenum<StatusCode>(AclRunOperator(_object.get(), queue.get(), pack.get()));
771  }
772 
773 protected:
774  /** Constructor */
775  Operator() = default;
776 };
777 
778 /// Operators
780 class Activation : public Operator
781 {
782 public:
783  Activation(Context &ctx, const TensorDescriptor &src, const TensorDescriptor &dst, const ActivationDesc &desc, StatusCode *status = nullptr)
784  {
785  AclOperator op;
786  const auto st = detail::as_enum<StatusCode>(AclActivation(&op, ctx.get(), src.get(), dst.get(), desc));
787  reset(op);
788  report_status(st, "[Compute Library] Failure during Activation operator creation");
789  if(status)
790  {
791  *status = st;
792  }
793  }
794 };
795 } // namespace acl
796 #undef ARM_COMPUTE_IGNORE_UNUSED
797 #endif /* ARM_COMPUTE_ACL_HPP_ */
AclUnsupportedTarget
@ AclUnsupportedTarget
Call failed as an invalid backend was requested.
Definition: AclTypes.h:55
AclTensorDescriptor::boffset
int64_t boffset
Offset in terms of bytes for the first element.
Definition: AclTypes.h:197
AclActivation
AclStatus AclActivation(AclOperator *op, AclContext ctx, const AclTensorDescriptor *src, const AclTensorDescriptor *dst, const AclActivationDescriptor info)
__cplusplus
Definition: AclActivation.cpp:30
acl::Target::GpuOcl
@ GpuOcl
Gpu target that leverages OpenCL.
OBJECT_DELETER
#define OBJECT_DELETER(obj, func)
Definition: Acl.hpp:75
acl::DataType::Int8
@ Int8
AclDestroyOperator
AclStatus AclDestroyOperator(AclOperator op)
Destroy a given operator object.
Definition: AclOperator.cpp:53
AclCpu
@ AclCpu
Cpu target that uses SIMD extensions.
Definition: AclTypes.h:65
AclAllocator
Definition: AclTypes.h:98
ARM_COMPUTE_IGNORE_UNUSED
#define ARM_COMPUTE_IGNORE_UNUSED(x)
Definition: Acl.hpp:39
acl::Context::Options::Options
Options(ExecutionMode mode, AclTargetCapabilities caps, bool enable_fast_math, const char *kernel_config, int32_t max_compute_units, AclAllocator *allocator)
Constructor.
Definition: Acl.hpp:348
arm_compute::test::validation::src
SimpleTensor< float > src
Definition: DFT.cpp:155
type
decltype(strategy::transforms) typedef type
Definition: gemm_interleaved.hpp:261
acl::Tensor::Tensor
Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status)
Constructor.
Definition: Acl.hpp:606
AclTensorDescriptor::strides
int64_t * strides
Strides on each dimension.
Definition: AclTypes.h:196
acl::TensorPack
Tensor pack class.
Definition: Acl.hpp:684
acl::Tensor::map
void * map()
Maps the backing memory of a given tensor that can be used by the host to access any contents.
Definition: Acl.hpp:621
AclGetTensorSize
AclStatus AclGetTensorSize(AclTensor tensor, uint64_t *size)
__cplusplus
Definition: AclTensor.cpp:159
AclDataTypeUnknown
@ AclDataTypeUnknown
Unknown data type.
Definition: AclTypes.h:164
acl
Definition: Acl.hpp:41
AclGetTensorDescriptor
AclStatus AclGetTensorDescriptor(AclTensor tensor, AclTensorDescriptor *desc)
Get the descriptor of this tensor.
Definition: AclTensor.cpp:176
AclExhaustive
@ AclExhaustive
Exhaustive tuning mode, increased tuning time but with best results.
Definition: AclTypes.h:151
acl::TensorDescriptor
Tensor Descriptor class.
Definition: Acl.hpp:503
acl::detail::operator==
bool operator==(const ObjectBase< T > &lhs, const ObjectBase< T > &rhs)
Equality operator for library object.
Definition: Acl.hpp:187
AclHostPtr
@ AclHostPtr
Host allocated memory.
Definition: AclTypes.h:187
acl::Activation::Activation
Activation(Context &ctx, const TensorDescriptor &src, const TensorDescriptor &dst, const ActivationDesc &desc, StatusCode *status=nullptr)
Definition: Acl.hpp:783
acl::TensorDescriptor::TensorDescriptor
TensorDescriptor(const AclTensorDescriptor &desc)
Constructor.
Definition: Acl.hpp:524
AclUInt16
@ AclUInt16
16-bit unsigned integer
Definition: AclTypes.h:167
AclContextOptions::capabilities
AclTargetCapabilities capabilities
Target capabilities.
Definition: AclTypes.h:137
acl::Queue::Queue
Queue(Context &ctx, const Options &options=Options(), StatusCode *status=nullptr)
Constructor.
Definition: Acl.hpp:463
AclRunOperator
AclStatus AclRunOperator(AclOperator op, AclQueue queue, AclTensorPack tensors)
Eager execution of a given operator on a list of inputs and outputs.
Definition: AclOperator.cpp:31
arm_compute::test::validation::dst
auto dst
Definition: DFT.cpp:170
acl::TensorPack::PackPair::PackPair
PackPair(Tensor *tensor_, int32_t slot_id_)
Constructor.
Definition: Acl.hpp:695
AclSuccess
@ AclSuccess
Call succeeded, leading to valid state for all involved objects/data.
Definition: AclTypes.h:51
acl::DataType::UInt8
@ UInt8
acl::TensorDescriptor::TensorDescriptor
TensorDescriptor(const std::vector< int32_t > &shape, DataType data_type)
Constructor.
Definition: Acl.hpp:511
AclPreferFastStart
@ AclPreferFastStart
Prioritize performance when a single iterations is expected to be performed.
Definition: AclTypes.h:73
acl::Target
Target
< Target enum
Definition: Acl.hpp:293
acl::Context
Context class.
Definition: Acl.hpp:317
AclUnsupportedConfig
@ AclUnsupportedConfig
Call failed as configuration is unsupported.
Definition: AclTypes.h:58
acl::TensorPack::PackPair
Pack pair construct.
Definition: Acl.hpp:688
AclInvalidObjectState
@ AclInvalidObjectState
Call failed as an object has invalid state.
Definition: AclTypes.h:59
acl::detail::operator!=
bool operator!=(const ObjectBase< T > &lhs, const ObjectBase< T > &rhs)
Inequality operator for library object.
Definition: Acl.hpp:202
acl::Operator
Operator class.
Definition: Acl.hpp:758
acl::ExecutionMode
ExecutionMode
Definition: Acl.hpp:300
acl::detail::as_cenum
constexpr E as_cenum(SE v) noexcept
Convert a strongly typed enum to an old plain c enum.
Definition: Acl.hpp:104
acl::ExecutionMode::FastRerun
@ FastRerun
Prefer minimum latency in consecutive runs, might introduce higher startup times.
AclMapTensor
AclStatus AclMapTensor(AclTensor tensor, void **handle)
Map a tensor's backing memory to the host.
Definition: AclTensor.cpp:98
AclCreateTensorPack
AclStatus AclCreateTensorPack(AclTensorPack *pack, AclContext ctx)
Creates a tensor pack.
Definition: AclTensorPack.cpp:50
acl::Context::Options::copts
AclContextOptions copts
Definition: Acl.hpp:363
acl::StatusCode::UnsupportedConfig
@ UnsupportedConfig
AclContextOptions::kernel_config_file
const char * kernel_config_file
Kernel cofiguration file.
Definition: AclTypes.h:139
AclDestroyQueue
AclStatus AclDestroyQueue(AclQueue queue)
Destroy a given queue object.
Definition: AclQueue.cpp:87
acl::DataType
DataType
Definition: Acl.hpp:485
acl::Operator::run
StatusCode run(Queue &queue, TensorPack &pack)
Run an operator on a given input list.
Definition: Acl.hpp:768
AclRapid
@ AclRapid
Fast tuning mode, testing a small portion of the tuning space.
Definition: AclTypes.h:149
AclDestroyContext
AclStatus AclDestroyContext(AclContext ctx)
Destroy a given context object.
Definition: AclContext.cpp:107
acl::DataType::Float16
@ Float16
AclOperator_
Definition: IOperator.h:37
AclUnmapTensor
AclStatus AclUnmapTensor(AclTensor tensor, void *handle)
Unmap the tensor's backing memory.
Definition: AclTensor.cpp:117
AclOutOfMemory
@ AclOutOfMemory
Call failed due to failure to allocate resources.
Definition: AclTypes.h:53
AclContextOptions
Definition: AclTypes.h:134
AclCreateQueue
AclStatus AclCreateQueue(AclQueue *queue, AclContext ctx, const AclQueueOptions *options)
Create an operator queue.
Definition: AclQueue.cpp:45
acl::Tensor::Tensor
Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status=nullptr)
Constructor.
Definition: Acl.hpp:595
acl::Queue::Options::Options
Options()
Default Constructor.
Definition: Acl.hpp:427
acl::Tensor::get_size
uint64_t get_size()
Get the size of the tensor in byte.
Definition: Acl.hpp:659
acl::detail::ObjectBase::operator=
ObjectBase< T > & operator=(const ObjectBase< T > &)=default
Copy assignment operator.
acl::detail::ObjectBase::reset
void reset(T *val)
Reset object value.
Definition: Acl.hpp:148
AclDestroyTensor
AclStatus AclDestroyTensor(AclTensor tensor)
Destroy a given tensor object.
Definition: AclTensor.cpp:145
AclInt32
@ AclInt32
32-bit signed integer
Definition: AclTypes.h:170
arm_compute::test::validation::shape
shape
Definition: DFT.cpp:115
AclContextOptions::mode
AclExecutionMode mode
Execution mode to use.
Definition: AclTypes.h:136
AclQueueFinish
AclStatus AclQueueFinish(AclQueue queue)
Wait until all elements on the queue have been completed.
Definition: AclQueue.cpp:72
acl::TensorPack::add
StatusCode add(std::initializer_list< PackPair > packed)
Add a list of tensors to a tensor pack.
Definition: Acl.hpp:738
acl::Tensor
Tensor class.
Definition: Acl.hpp:584
AclInvalidTarget
@ AclInvalidTarget
Call failed as invalid argument was passed.
Definition: AclTypes.h:56
AclFloat16
@ AclFloat16
16-bit floating point
Definition: AclTypes.h:171
AclTensorDescriptor::data_type
AclDataType data_type
Tensor Data type.
Definition: AclTypes.h:195
acl::TuningMode::Normal
@ Normal
AclDestroyTensorPack
AclStatus AclDestroyTensorPack(AclTensorPack pack)
Destroy a given tensor pack object.
Definition: AclTensorPack.cpp:94
acl::StatusCode::InvalidArgument
@ InvalidArgument
AclUInt8
@ AclUInt8
8-bit unsigned integer
Definition: AclTypes.h:165
acl::StatusCode::InvalidTarget
@ InvalidTarget
AclPackTensors
AclStatus AclPackTensors(AclTensorPack pack, AclTensor *tensors, int32_t *slot_ids, size_t num_tensors)
A list of tensors to a tensor pack.
Definition: AclTensorPack.cpp:80
AclPreferFastRerun
@ AclPreferFastRerun
Prioritize performance when multiple iterations are performed.
Definition: AclTypes.h:72
acl::Queue::Options::opts
AclQueueOptions opts
Definition: Acl.hpp:439
AclCpuCapabilitiesAuto
@ AclCpuCapabilitiesAuto
Automatic discovery of capabilities.
Definition: AclTypes.h:79
acl::TensorPack::PackPair::slot_id
int32_t slot_id
Slot id in respect with the operator.
Definition: Acl.hpp:701
acl::Queue::Options
< Queue options
Definition: Acl.hpp:420
acl::TuningMode
TuningMode
Definition: Acl.hpp:399
acl::Context::Context
Context(Target target, const Options &options, StatusCode *status=nullptr)
Constructor.
Definition: Acl.hpp:385
AclTargetCapabilities
uint64_t AclTargetCapabilities
Error codes returned by the public entry-points.
Definition: AclTypes.h:46
acl::DataType::BFloat16
@ BFloat16
acl::StatusCode::UnsupportedTarget
@ UnsupportedTarget
AclFloat32
@ AclFloat32
32-bit floating point
Definition: AclTypes.h:173
arm_compute::test::validation::pack
ITensorPack pack
Definition: Im2Col.cpp:188
AclRuntimeError
@ AclRuntimeError
Call failed during execution.
Definition: AclTypes.h:52
acl::detail::ObjectDeleter
Construct to handle destruction of objects.
Definition: Acl.hpp:71
acl::Queue::Options::Options
Options(TuningMode mode, int32_t compute_units)
Constructor.
Definition: Acl.hpp:434
tensor
CLTensor * tensor
Pointer to the auxiliary tensor.
Definition: ClWorkloadRuntime.cpp:66
acl::Tensor::unmap
StatusCode unmap(void *handle)
Unmaps tensor's memory.
Definition: Acl.hpp:634
acl::Activation
Definition: Acl.hpp:780
AclInt8
@ AclInt8
8-bit signed integer
Definition: AclTypes.h:166
AclPackTensor
AclStatus AclPackTensor(AclTensorPack pack, AclTensor tensor, int32_t slot_id)
Add a tensor to a tensor pack.
Definition: AclTensorPack.cpp:70
AclBFloat16
@ AclBFloat16
16-bit brain floating point
Definition: AclTypes.h:172
acl::Queue::Queue
Queue(Context &ctx, StatusCode *status=nullptr)
Constructor.
Definition: Acl.hpp:451
acl::StatusCode::InvalidObjectState
@ InvalidObjectState
Acl.h
arm_compute::test::validation::data_type
data_type
Definition: Cast.cpp:223
AclTuningModeNone
@ AclTuningModeNone
No tuning.
Definition: AclTypes.h:148
acl::Context::Options
< Context options
Definition: Acl.hpp:321
acl::DataType::Float32
@ Float32
AclTensor
struct AclTensor_ * AclTensor
Opaque Tensor pack object.
Definition: AclTypes.h:39
acl::detail::ObjectBase::get
const T * get() const
Access uderlying object.
Definition: Acl.hpp:156
AclInvalidArgument
@ AclInvalidArgument
Call failed as invalid argument was passed.
Definition: AclTypes.h:57
acl::StatusCode::RuntimeError
@ RuntimeError
AclTensor_
Definition: ITensorV2.h:30
acl::detail::ObjectBase::~ObjectBase
~ObjectBase()=default
Destructor.
AclGpuOcl
@ AclGpuOcl
OpenCL target for GPU.
Definition: AclTypes.h:66
acl::TensorPack::add
StatusCode add(Tensor &tensor, int32_t slot_id)
Add tensor to tensor pack.
Definition: Acl.hpp:728
acl::ImportType
ImportType
Import memory types.
Definition: Acl.hpp:573
acl::DataType::Int16
@ Int16
acl::StatusCode
StatusCode
Status code enum.
Definition: Acl.hpp:50
AclTensorImport
AclStatus AclTensorImport(AclTensor tensor, void *handle, AclImportMemoryType type)
Import external memory to a given tensor object.
Definition: AclTensor.cpp:131
acl::TensorDescriptor::get
const AclTensorDescriptor * get() const
Get underlying C tensor descriptor.
Definition: Acl.hpp:538
acl::Target::Cpu
@ Cpu
Cpu target that leverages SIMD.
AclTensorDescriptor
Definition: AclTypes.h:191
acl::ExecutionMode::FastStart
@ FastStart
Prefer minimizing startup time.
AclCreateTensor
AclStatus AclCreateTensor(AclTensor *tensor, AclContext ctx, const AclTensorDescriptor *desc, bool allocate)
Create a Tensor object.
Definition: AclTensor.cpp:69
acl::TuningMode::Rapid
@ Rapid
acl::Queue::finish
StatusCode finish()
Block until all the tasks of the queue have been marked as finished.
Definition: Acl.hpp:478
acl::TensorDescriptor::operator==
bool operator==(const TensorDescriptor &other)
Operator to compare two TensorDescriptor.
Definition: Acl.hpp:548
acl::Context::Options::num_threads_auto
static constexpr int32_t num_threads_auto
Allow runtime to specify number of threads.
Definition: Acl.hpp:323
AclCreateContext
AclStatus AclCreateContext(AclContext *ctx, AclTarget target, const AclContextOptions *options)
__cplusplus
Definition: AclContext.cpp:80
AclTensorDescriptor::shape
int32_t * shape
Tensor Shape.
Definition: AclTypes.h:194
acl::TensorPack::TensorPack
TensorPack(Context &ctx, StatusCode *status=nullptr)
Constructor.
Definition: Acl.hpp:710
clang_tidy_rules.mode
mode
Definition: clang_tidy_rules.py:196
AclQueue_
Definition: IQueue.h:29
acl::Context::Options::Options
Options()
Default Constructor.
Definition: Acl.hpp:330
acl::ImportType::Host
@ Host
AclNormal
@ AclNormal
Normal tuning mode, gives a good balance between tuning mode and performance.
Definition: AclTypes.h:150
AclTensorPack_
Definition: TensorPack.h:30
acl::DataType::Int32
@ Int32
AclSlotUnknown
@ AclSlotUnknown
Definition: AclTypes.h:203
AclTensorDescriptor::ndims
int32_t ndims
Number or dimensions.
Definition: AclTypes.h:193
acl::DataType::UInt16
@ UInt16
acl::StatusCode::Unimplemented
@ Unimplemented
acl::Tensor::get_descriptor
TensorDescriptor get_descriptor()
Get the descriptor of this tensor.
Definition: Acl.hpp:670
acl::detail::ObjectBase
Object base class for library objects.
Definition: Acl.hpp:131
AclContextOptions::allocator
AclAllocator * allocator
Allocator to be used by all the memory internally.
Definition: AclTypes.h:142
AclContext_
Definition: IContext.h:34
AclUint32
@ AclUint32
32-bit unsigned integer
Definition: AclTypes.h:169
AclInt16
@ AclInt16
16-bit signed integer
Definition: AclTypes.h:168
acl::StatusCode::OutOfMemory
@ OutOfMemory
acl::TensorPack::PackPair::tensor
Tensor * tensor
Tensor object.
Definition: Acl.hpp:700
acl::TuningMode::Exhaustive
@ Exhaustive
acl::detail::as_enum
constexpr SE as_enum(E val) noexcept
Convert plain old enumeration to a strongly typed enum.
Definition: Acl.hpp:119
AclQueueOptions
Definition: AclTypes.h:155
AclUnimplemented
@ AclUnimplemented
Call failed as requested capability is not implemented.
Definition: AclTypes.h:54
acl::StatusCode::Success
@ Success
AclContextOptions::enable_fast_math
bool enable_fast_math
Allow precision loss.
Definition: AclTypes.h:138
AclActivationDescriptor
Definition: AclDescriptors.h:52
acl::DataType::Unknown
@ Unknown
acl::detail::ObjectBase::get
T * get()
Access uderlying object.
Definition: Acl.hpp:164
acl::Queue
Queue class.
Definition: Acl.hpp:416
acl::Context::Context
Context(Target target, StatusCode *status=nullptr)
Constructor.
Definition: Acl.hpp:375
acl::DataType::UInt32
@ UInt32
arm_compute::test::validation::allocator
input allocator() -> allocate()
AclContextOptions::max_compute_units
int32_t max_compute_units
Max compute units that can be used by a queue created from the context.
Definition: AclTypes.h:140