24.02.1
|
Go to the documentation of this file.
32 #if !defined(__APPLE__) && !defined(__OpenBSD__)
36 #define posix_memalign _aligned_realloc
37 #define posix_memalign_free _aligned_free
38 #endif // defined(_WIN64)
39 #endif // !defined(__APPLE__) && !defined(__OpenBSD__)
51 void *default_allocate(
void *user_data,
size_t size)
54 return ::operator
new(size);
56 void default_free(
void *user_data,
void *ptr)
59 ::operator
delete(ptr);
61 void *default_aligned_allocate(
void *user_data,
size_t size,
size_t alignment)
65 #if defined(BARE_METAL)
66 size_t rem = size % alignment;
67 size_t real_size = (rem) ? (size + alignment - rem) : size;
68 ptr = memalign(alignment, real_size);
70 if (posix_memalign(&ptr, alignment, size) != 0)
80 void default_aligned_free(
void *user_data,
void *ptr)
85 static AclAllocator default_allocator = {&default_allocate, &default_free, &default_aligned_allocate,
86 &default_aligned_free,
nullptr};
88 AllocatorWrapper populate_allocator(
AclAllocator *external_allocator)
90 bool is_valid = (external_allocator !=
nullptr);
98 return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator);
103 cpuinfo::CpuIsaInfo isa_caps;
113 isa_caps.svebf16 = isa_caps.bf16;
125 CpuCapabilities caps;
131 cpuinfo::CpuIsaInfo
isa = populate_capabilities_flags(external_caps);
132 auto cpus = caps.cpu_info.cpus();
134 caps.cpu_info = cpuinfo::CpuInfo(
isa, cpus);
138 #if defined(BARE_METAL)
140 caps.max_threads = 1;
142 caps.max_threads = (max_threads > 0) ? max_threads : std::thread::hardware_concurrency();
152 if (options !=
nullptr)
154 _allocator = populate_allocator(options->
allocator);
172 if (
tensor !=
nullptr && allocate)
Base class specifying the tensor interface.
CPU tensor implementation class.
AclTargetCapabilities capabilities
Target capabilities.
@ AclCpuCapabilitiesMmlaFp
Enable paths that use the mmla float instructions.
@ AclCpuCapabilitiesDot
Enable paths that use the udot/sdot instructions.
void *(* alloc)(void *user_data, size_t size)
Allocate a block of size bytes of memory.
void(* aligned_free)(void *user_data, void *ptr)
Allocate a block of size bytes of memory.
#define ARM_COMPUTE_LOG_ERROR_ACL(msg)
IQueue * create_queue(const AclQueueOptions *options) override
Create a queue object.
@ AclCpuCapabilitiesSve2
Enable SVE2 optimized paths.
@ AclCpuCapabilitiesMmlaInt8
Enable paths that use the mmla integer instructions.
Base class specifying the queue interface.
Structure that encodes the CPU capabilities to be used.
ITensorV2 * create_tensor(const AclTensorDescriptor &desc, bool allocate) override
Create a tensor object.
static CpuInfo build()
CpuInfo builder function from system related information.
@ AclCpuCapabilitiesFp16
Enable float16 data-type support.
@ AclCpuCapabilitiesBf16
Enable bfloat16 data-type support.
void *(* aligned_alloc)(void *user_data, size_t size, size_t alignment)
Allocate a block of size bytes of memory.
@ AclCpuCapabilitiesAuto
Automatic discovery of capabilities.
uint64_t AclTargetCapabilities
Error codes returned by the public entry-points.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
CLTensor * tensor
Pointer to the auxiliary tensor.
Default malloc allocator implementation.
void(* free)(void *user_data, void *ptr)
Release a block of size bytes of memory.
const CpuCapabilities & capabilities() const
Cpu Capabilities accessor.
Copyright (c) 2017-2024 Arm Limited.
CPU queue implementation class.
@ AclCpuCapabilitiesSve
Enable SVE optimized paths.
AclAllocator * allocator
Allocator to be used by all the memory internally.
CpuContext(const AclContextOptions *options)
Default Constructor.
AllocatorWrapper & allocator()
Backing memory allocator accessor.
@ AclCpuCapabilitiesNeon
Enable NEON optimized paths.
int32_t max_compute_units
Max compute units that can be used by a queue created from the context.