Compute Library
 22.02
CpuContext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/cpu/CpuContext.h"
25 
27 #include "src/cpu/CpuQueue.h"
28 #include "src/cpu/CpuTensor.h"
29 
30 #include <cstdlib>
31 #if !defined(__APPLE__) && !defined(__OpenBSD__)
32 #include <malloc.h>
33 #endif // !defined(__APPLE__) && !defined(__OpenBSD__)
34 
35 
36 namespace arm_compute
37 {
38 namespace cpu
39 {
40 namespace
41 {
42 void *default_allocate(void *user_data, size_t size)
43 {
44  ARM_COMPUTE_UNUSED(user_data);
45  return ::operator new(size);
46 }
47 void default_free(void *user_data, void *ptr)
48 {
49  ARM_COMPUTE_UNUSED(user_data);
50  ::operator delete(ptr);
51 }
52 void *default_aligned_allocate(void *user_data, size_t size, size_t alignment)
53 {
54  ARM_COMPUTE_UNUSED(user_data);
55  void *ptr = nullptr;
56 #if defined(BARE_METAL)
57  size_t rem = size % alignment;
58  size_t real_size = (rem) ? (size + alignment - rem) : size;
59  ptr = memalign(alignment, real_size);
60 #else /* defined(BARE_METAL) */
61  if(posix_memalign(&ptr, alignment, size) != 0)
62  {
63  // posix_memalign returns non-zero on failures, the return values will be
64  // - EINVAL: wrong alignment
65  // - ENOMEM: insufficient memory
66  ARM_COMPUTE_LOG_ERROR_ACL("posix_memalign failed, the returned pointer will be invalid");
67  }
68 #endif /* defined(BARE_METAL) */
69  return ptr;
70 }
71 void default_aligned_free(void *user_data, void *ptr)
72 {
73  ARM_COMPUTE_UNUSED(user_data);
74  free(ptr);
75 }
76 static AclAllocator default_allocator = { &default_allocate,
77  &default_free,
78  &default_aligned_allocate,
79  &default_aligned_free,
80  nullptr
81  };
82 
83 AllocatorWrapper populate_allocator(AclAllocator *external_allocator)
84 {
85  bool is_valid = (external_allocator != nullptr);
86  if(is_valid)
87  {
88  is_valid = is_valid && (external_allocator->alloc != nullptr);
89  is_valid = is_valid && (external_allocator->free != nullptr);
90  is_valid = is_valid && (external_allocator->aligned_alloc != nullptr);
91  is_valid = is_valid && (external_allocator->aligned_free != nullptr);
92  }
93  return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator);
94 }
95 
96 cpuinfo::CpuIsaInfo populate_capabilities_flags(AclTargetCapabilities external_caps)
97 {
98  cpuinfo::CpuIsaInfo isa_caps;
99 
100  // Extract SIMD extension
101  isa_caps.neon = external_caps & AclCpuCapabilitiesNeon;
102  isa_caps.sve = external_caps & AclCpuCapabilitiesSve;
103  isa_caps.sve2 = external_caps & AclCpuCapabilitiesSve2;
104 
105  // Extract data-type support
106  isa_caps.fp16 = external_caps & AclCpuCapabilitiesFp16;
107  isa_caps.bf16 = external_caps & AclCpuCapabilitiesBf16;
108  isa_caps.svebf16 = isa_caps.bf16;
109 
110  // Extract ISA extensions
111  isa_caps.dot = external_caps & AclCpuCapabilitiesDot;
112  isa_caps.i8mm = external_caps & AclCpuCapabilitiesMmlaInt8;
113  isa_caps.svef32mm = external_caps & AclCpuCapabilitiesMmlaFp;
114 
115  return isa_caps;
116 }
117 
118 CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps,
119  int32_t max_threads)
120 {
121  CpuCapabilities caps;
122 
123  // Populate capabilities with system information
124  caps.cpu_info = cpuinfo::CpuInfo::build();
125  if(external_caps != AclCpuCapabilitiesAuto)
126  {
127  cpuinfo::CpuIsaInfo isa = populate_capabilities_flags(external_caps);
128  auto cpus = caps.cpu_info.cpus();
129 
130  caps.cpu_info = cpuinfo::CpuInfo(isa, cpus);
131  }
132 
133  // Set max number of threads
134 #if defined(BARE_METAL)
135  ARM_COMPUTE_UNUSED(max_threads);
136  caps.max_threads = 1;
137 #else /* defined(BARE_METAL) */
138  caps.max_threads = (max_threads > 0) ? max_threads : std::thread::hardware_concurrency();
139 #endif /* defined(BARE_METAL) */
140 
141  return caps;
142 }
143 } // namespace
144 
146  : IContext(Target::Cpu),
147  _allocator(default_allocator),
148  _caps(populate_capabilities(AclCpuCapabilitiesAuto, -1))
149 {
150  if(options != nullptr)
151  {
152  _allocator = populate_allocator(options->allocator);
153  _caps = populate_capabilities(options->capabilities, options->max_compute_units);
154  }
155 }
156 
158 {
159  return _caps;
160 }
161 
163 {
164  return _allocator;
165 }
166 
168 {
169  CpuTensor *tensor = new CpuTensor(this, desc);
170  if(tensor != nullptr && allocate)
171  {
172  tensor->allocate();
173  }
174  return tensor;
175 }
176 
178 {
179  return new CpuQueue(this, options);
180 }
181 } // namespace cpu
182 } // namespace arm_compute
static CpuInfo build()
CpuInfo builder function from system related information.
Definition: CpuInfo.cpp:298
Base class specifying the queue interface.
Definition: IQueue.h:41
Enable bfloat16 data-type support.
Definition: AclTypes.h:87
Structure that encodes the CPU capabilities to be used.
Definition: CpuContext.h:36
Base class specifying the tensor interface.
Definition: ITensorV2.h:45
void(* free)(void *user_data, void *ptr)
Release a block of size bytes of memory.
Definition: AclTypes.h:113
Automatic discovery of capabilities.
Definition: AclTypes.h:79
Enable SVE optimized paths.
Definition: AclTypes.h:82
int32_t max_compute_units
Max compute units that can be used by a queue created from the context.
Definition: AclTypes.h:140
#define ARM_COMPUTE_LOG_ERROR_ACL(msg)
Log an error message to the logger.
Definition: Log.h:77
Enable paths that use the mmla integer instructions.
Definition: AclTypes.h:91
Context interface.
Definition: IContext.h:50
Enable NEON optimized paths.
Definition: AclTypes.h:81
Copyright (c) 2017-2021 Arm Limited.
AclTargetCapabilities capabilities
Target capabilities.
Definition: AclTypes.h:137
uint64_t AclTargetCapabilities
Error codes returned by the public entry-points.
Definition: AclTypes.h:46
void *(* alloc)(void *user_data, size_t size)
Allocate a block of size bytes of memory.
Definition: AclTypes.h:107
AllocatorWrapper & allocator()
Backing memory allocator accessor.
Definition: CpuContext.cpp:162
StatusCode allocate()
Allocates tensor.
Definition: CpuTensor.cpp:52
void *(* aligned_alloc)(void *user_data, size_t size, size_t alignment)
Allocate a block of size bytes of memory.
Definition: AclTypes.h:121
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
CpuContext(const AclContextOptions *options)
Default Constructor.
Definition: CpuContext.cpp:145
const CpuCapabilities & capabilities() const
Cpu Capabilities accessor.
Definition: CpuContext.cpp:157
Enable SVE2 optimized paths.
Definition: AclTypes.h:83
CPU tensor implementation class.
Definition: CpuTensor.h:36
CPU queue implementation class.
Definition: CpuQueue.h:36
void(* aligned_free)(void *user_data, void *ptr)
Allocate a block of size bytes of memory.
Definition: AclTypes.h:127
IQueue * create_queue(const AclQueueOptions *options) override
Create a queue object.
Definition: CpuContext.cpp:177
Default malloc allocator implementation.
Enable paths that use the mmla float instructions.
Definition: AclTypes.h:92
ITensorV2 * create_tensor(const AclTensorDescriptor &desc, bool allocate) override
Create a tensor object.
Definition: CpuContext.cpp:167
Enable float16 data-type support.
Definition: AclTypes.h:86
AclAllocator * allocator
Allocator to be used by all the memory internally.
Definition: AclTypes.h:142
Enable paths that use the udot/sdot instructions.
Definition: AclTypes.h:90