37 #if !defined(BARE_METAL) 43 #if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) 47 #include <asm/hwcap.h> 53 #define HWCAP_ASIMDHP (1 << 10) // NOLINT 57 #define HWCAP_CPUID (1 << 11) // NOLINT 61 #define HWCAP_ASIMDDP (1 << 20) // NOLINT 68 #if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) 70 bool model_supports_dot(
CPUModel model)
83 bool model_supports_fp16(
CPUModel model)
98 CPUModel midr_to_model(
const unsigned int midr)
103 const int implementer = (midr >> 24) & 0xFF;
104 const int variant = (midr >> 20) & 0xF;
105 const int cpunum = (midr >> 4) & 0xFFF;
107 if(implementer == 0x41)
153 else if(implementer == 0x48)
170 void populate_models_cpuid(std::vector<CPUModel> &cpusv)
176 std::stringstream
str;
177 str <<
"/sys/devices/system/cpu/cpu" << i++ <<
"/regs/identification/midr_el1";
179 file.open(str.str(), std::ios::in);
183 if(
bool(getline(file, line)))
186 c = midr_to_model(midr & 0xffffffff);
192 void populate_models_cpuinfo(std::vector<CPUModel> &cpusv)
200 memset(&proc_regex, 0,
sizeof(regex_t));
201 memset(&imp_regex, 0,
sizeof(regex_t));
202 memset(&var_regex, 0,
sizeof(regex_t));
203 memset(&part_regex, 0,
sizeof(regex_t));
204 memset(&rev_regex, 0,
sizeof(regex_t));
208 ret_status |= regcomp(&proc_regex, R
"(^processor.*([[:digit:]]+)$)", REG_EXTENDED); 209 ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED); 210 ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED); 211 ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED); 212 ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED); 217 file.open(
"/proc/cpuinfo", std::ios::in);
225 while(
bool(getline(file, line)))
227 std::array<regmatch_t, 2> match;
228 ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0);
231 std::string
id = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
234 if(curcpu >= 0 && midr == 0)
242 cpusv[curcpu] = midr_to_model(midr);
251 ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0);
254 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
256 midr |= (impv << 24);
261 ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0);
264 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
266 midr |= (varv << 20);
271 ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0);
274 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
276 midr |= (partv << 4);
281 ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0);
284 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
295 cpusv[curcpu] = midr_to_model(midr);
300 regfree(&proc_regex);
303 regfree(&part_regex);
310 std::ifstream CPUspresent;
311 CPUspresent.open(
"/sys/devices/system/cpu/present", std::ios::in);
312 bool success =
false;
314 if(CPUspresent.is_open())
318 if(
bool(getline(CPUspresent, line)))
325 auto startfrom = line.begin();
327 for(
auto i = line.begin(); i < line.end(); ++i)
329 if(*i ==
'-' || *i ==
',')
335 line.erase(line.begin(), startfrom);
345 max_cpus = std::thread::hardware_concurrency();
361 #if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) 363 bool hwcaps_fp16_support =
false;
364 bool hwcaps_dot_support =
false;
366 const uint32_t hwcaps = getauxval(AT_HWCAP);
375 hwcaps_fp16_support =
true;
378 #if defined(__aarch64__) 381 hwcaps_dot_support =
true;
385 const unsigned int max_cpus = get_max_cpus();
390 populate_models_cpuid(percpu);
394 populate_models_cpuinfo(percpu);
399 bool one_supports_dot =
false;
400 bool one_supports_fp16 =
false;
401 for(
const auto &v : percpu)
403 one_supports_dot = one_supports_dot || model_supports_dot(v);
404 one_supports_fp16 = one_supports_fp16 || model_supports_fp16(v);
407 cpuinfo.
set_dotprod(one_supports_dot || hwcaps_dot_support);
408 cpuinfo.
set_fp16(one_supports_fp16 || hwcaps_fp16_support);
416 unsigned int num_threads_hint = 1;
418 #if !defined(BARE_METAL) 419 std::map<std::string, unsigned int> cpu_part_occurrence_map;
422 regex_t cpu_part_rgx;
423 memset(&cpu_part_rgx, 0,
sizeof(regex_t));
424 int ret_status = regcomp(&cpu_part_rgx, R
"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED); 429 std::ifstream cpuinfo;
430 cpuinfo.open(
"/proc/cpuinfo", std::ios::in);
431 if(cpuinfo.is_open())
434 while(
bool(getline(cpuinfo, line)))
436 std::array<regmatch_t, 2> match;
437 ret_status = regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0);
440 std::string cpu_part = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
441 if(cpu_part_occurrence_map.find(cpu_part) != cpu_part_occurrence_map.end())
443 cpu_part_occurrence_map[cpu_part]++;
447 cpu_part_occurrence_map[cpu_part] = 1;
452 regfree(&cpu_part_rgx);
455 auto min_common_cores = std::min_element(cpu_part_occurrence_map.begin(), cpu_part_occurrence_map.end(),
456 [](
const std::pair<std::string, unsigned int> &p1,
const std::pair<std::string, unsigned int> &p2)
458 return p1.second < p2.second;
462 num_threads_hint = cpu_part_occurrence_map.empty() ? std::thread::hardware_concurrency() : min_common_cores->second;
465 return num_threads_hint;
unsigned long stoul(const std::string &str, std::size_t *pos=0, NumericBase base=NumericBase::BASE_10)
Convert string values to unsigned long.
CPUModel
CPU models - we only need to detect CPUs we have microarchitecture-specific code for.
Copyright (c) 2017-2021 Arm Limited.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
void set_cpu_model(unsigned int cpuid, CPUModel model)
Set the cpumodel for a given cpu core.
int stoi(const std::string &str, std::size_t *pos=0, NumericBase base=NumericBase::BASE_10)
Convert string values to integer.
void set_dotprod(const bool dotprod)
Set dot product support.
void get_cpu_configuration(CPUInfo &cpuinfo)
This function will try to detect the CPU configuration on the system and will fill the cpuinfo object...
unsigned int get_threads_hint()
Some systems have both big and small cores, this fuction computes the minimum number of cores that ar...
void set_fp16(const bool fp16)
Set fp16 support.
void set_cpu_num(unsigned int cpu_count)
Set max number of cpus.