45 MaliHWInfo get_mali_hw_info(
const char *
path)
47 int fd = open(
path, O_RDWR);
55 mali_userspace::uku_version_check_args version_check_args;
56 version_check_args.header.id = mali_userspace::UKP_FUNC_ID_CHECK_VERSION;
57 version_check_args.major = 10;
58 version_check_args.minor = 2;
60 if(mali_userspace::mali_ioctl(fd, version_check_args) != 0)
68 mali_userspace::kbase_uk_hwcnt_reader_set_flags flags;
69 memset(&flags, 0,
sizeof(flags));
70 flags.header.id = mali_userspace::KBASE_FUNC_SET_FLAGS;
71 flags.create_flags = mali_userspace::BASE_CONTEXT_CREATE_KERNEL_FLAGS;
73 if(mali_userspace::mali_ioctl(fd, flags) != 0)
81 mali_userspace::kbase_uk_gpuprops props;
82 props.header.id = mali_userspace::KBASE_FUNC_GPU_PROPS_REG_DUMP;
84 if(mali_ioctl(fd, props) != 0)
91 memset(&hw_info, 0,
sizeof(hw_info));
92 hw_info.gpu_id = props.props.core_props.product_id;
93 hw_info.r_value = props.props.core_props.major_revision;
94 hw_info.p_value = props.props.core_props.minor_revision;
96 for(
unsigned int i = 0; i < props.props.coherency_info.num_core_groups; ++i)
98 hw_info.core_mask |= props.props.coherency_info.group[i].core_mask;
101 hw_info.mp_count = __builtin_popcountll(hw_info.core_mask);
119 {
"ARITH_WORDS", {
"Arithmetic pipe", std::map<int, uint64_t>(),
"instructions" } },
120 {
"LS_ISSUE", {
"LS pipe", std::map<int, uint64_t>(),
"instructions" } },
121 {
"TEX_ISSUE", {
"Texture pipe", std::map<int, uint64_t>(),
"instructions" } },
122 {
"COMPUTE_ACTIVE", {
"Compute core", std::map<int, uint64_t>(),
"cycles" } },
123 {
"FRAG_ACTIVE", {
"Fragment core", std::map<int, uint64_t>(),
"cycles" } },
133 _scale_factor = 1000;
137 _scale_factor = 1000000;
152 void MaliCounter::init()
156 MaliHWInfo hw_info = get_mali_hw_info(_device);
158 _num_cores = hw_info.mp_count;
160 _fd = open(_device, O_RDWR | O_CLOEXEC | O_NONBLOCK);
168 mali_userspace::kbase_uk_hwcnt_reader_version_check_args check;
169 memset(&check, 0,
sizeof(check));
171 if(mali_userspace::mali_ioctl(_fd, check) != 0)
175 else if(check.major < 10)
182 mali_userspace::kbase_uk_hwcnt_reader_set_flags flags;
183 memset(&flags, 0,
sizeof(flags));
184 flags.header.id = mali_userspace::KBASE_FUNC_SET_FLAGS;
185 flags.create_flags = mali_userspace::BASE_CONTEXT_CREATE_KERNEL_FLAGS;
187 if(mali_userspace::mali_ioctl(_fd, flags) != 0)
194 mali_userspace::kbase_uk_hwcnt_reader_setup setup;
195 memset(&setup, 0,
sizeof(setup));
196 setup.header.id = mali_userspace::KBASE_FUNC_HWCNT_READER_SETUP;
197 setup.buffer_count = _buffer_count;
199 setup.shader_bm = -1;
201 setup.mmu_l2_bm = -1;
204 if(mali_userspace::mali_ioctl(_fd, setup) != 0)
213 uint32_t api_version = ~mali_userspace::HWCNT_READER_API;
215 if(ioctl(_hwc_fd, mali_userspace::KBASE_HWCNT_READER_GET_API_VERSION, &api_version) != 0)
219 else if(api_version != mali_userspace::HWCNT_READER_API)
225 if(ioctl(_hwc_fd,
static_cast<int>(mali_userspace::KBASE_HWCNT_READER_GET_BUFFER_SIZE), &_buffer_size) != 0)
230 if(ioctl(_hwc_fd,
static_cast<int>(mali_userspace::KBASE_HWCNT_READER_GET_HWVER), &_hw_ver) != 0)
240 _sample_data =
static_cast<uint8_t *
>(mmap(
nullptr, _buffer_count * _buffer_size, PROT_READ, MAP_PRIVATE, _hwc_fd, 0));
242 if(_sample_data == MAP_FAILED)
247 auto product = std::find_if(std::begin(mali_userspace::products),
std::end(mali_userspace::products), [&](
const mali_userspace::CounterMapping & cm)
249 return (cm.product_mask & hw_info.gpu_id) == cm.product_id;
252 if(product !=
std::end(mali_userspace::products))
254 _names_lut = product->names_lut;
261 _raw_counter_buffer.resize(_buffer_size /
sizeof(uint32_t));
264 _core_index_remap.clear();
265 _core_index_remap.reserve(hw_info.mp_count);
267 unsigned int mask = hw_info.core_mask;
271 unsigned int bit = __builtin_ctz(mask);
272 _core_index_remap.push_back(bit);
273 mask &= ~(1u << bit);
277 void MaliCounter::term()
279 if(_sample_data !=
nullptr)
281 munmap(_sample_data, _buffer_count * _buffer_size);
282 _sample_data =
nullptr;
298 void MaliCounter::sample_counters()
300 if(ioctl(_hwc_fd, mali_userspace::KBASE_HWCNT_READER_DUMP, 0) != 0)
306 void MaliCounter::wait_next_event()
309 poll_fd.fd = _hwc_fd;
310 poll_fd.events = POLLIN;
312 const int count = poll(&poll_fd, 1, -1);
319 if((poll_fd.revents & POLLIN) != 0)
321 mali_userspace::kbase_hwcnt_reader_metadata meta;
323 if(ioctl(_hwc_fd,
static_cast<int>(mali_userspace::KBASE_HWCNT_READER_GET_BUFFER), &meta) != 0)
328 memcpy(_raw_counter_buffer.data(), _sample_data + _buffer_size * meta.buffer_idx, _buffer_size);
329 _timestamp = meta.timestamp;
331 if(ioctl(_hwc_fd, mali_userspace::KBASE_HWCNT_READER_PUT_BUFFER, &meta) != 0)
336 else if((poll_fd.revents & POLLHUP) != 0)
342 const uint32_t *MaliCounter::get_counters()
const
344 return _raw_counter_buffer.data();
347 const uint32_t *MaliCounter::get_counters(mali_userspace::MaliCounterBlockName block,
int core)
const
351 case mali_userspace::MALI_NAME_BLOCK_JM:
352 return _raw_counter_buffer.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 0;
353 case mali_userspace::MALI_NAME_BLOCK_MMU:
354 return _raw_counter_buffer.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 2;
355 case mali_userspace::MALI_NAME_BLOCK_TILER:
356 return _raw_counter_buffer.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 1;
363 return _raw_counter_buffer.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * (3 + _core_index_remap[core]);
367 int MaliCounter::find_counter_index_by_name(mali_userspace::MaliCounterBlockName block,
const char *
name)
369 const char *
const *names = &_names_lut[mali_userspace::MALI_NAME_BLOCK_SIZE * block];
371 for(
int i = 0; i < mali_userspace::MALI_NAME_BLOCK_SIZE; ++i)
373 if(strstr(names[i],
name) !=
nullptr)
386 _start_time = _timestamp;
394 const uint32_t *counter = get_counters(mali_userspace::MALI_NAME_BLOCK_JM);
395 _counters.at(
"GPU_ACTIVE") =
Measurement(counter[find_counter_index_by_name(mali_userspace::MALI_NAME_BLOCK_JM,
"GPU_ACTIVE")], _counters.at(
"GPU_ACTIVE").unit());
397 const int arith_index = find_counter_index_by_name(mali_userspace::MALI_NAME_BLOCK_SHADER,
"ARITH_WORDS");
398 const int ls_index = find_counter_index_by_name(mali_userspace::MALI_NAME_BLOCK_SHADER,
"LS_ISSUE");
399 const int tex_index = find_counter_index_by_name(mali_userspace::MALI_NAME_BLOCK_SHADER,
"TEX_ISSUE");
400 const int compute_index = find_counter_index_by_name(mali_userspace::MALI_NAME_BLOCK_SHADER,
"COMPUTE_ACTIVE");
401 const int frag_index = find_counter_index_by_name(mali_userspace::MALI_NAME_BLOCK_SHADER,
"FRAG_ACTIVE");
404 for(uint32_t core = 0; core < _num_cores; ++core)
406 const uint32_t *sc_counter = get_counters(mali_userspace::MALI_NAME_BLOCK_SHADER, core);
408 _core_counters.at(
"ARITH_WORDS").values[core] = sc_counter[arith_index];
409 _core_counters.at(
"LS_ISSUE").values[core] = sc_counter[ls_index];
410 _core_counters.at(
"TEX_ISSUE").values[core] = sc_counter[tex_index];
411 _core_counters.at(
"COMPUTE_ACTIVE").values[core] = sc_counter[compute_index];
412 _core_counters.at(
"FRAG_ACTIVE").values[core] = sc_counter[frag_index];
415 _stop_time = _timestamp;
420 return "Mali Counter";
425 Measurement counters((_counters.at(
"GPU_ACTIVE").value() / _scale_factor).v.floating_point, _unit + _counters.at(
"GPU_ACTIVE").unit());
429 {
"Timespan",
Measurement(_stop_time - _start_time,
"ns") },
430 {
"GPU active", counters },
433 for(
const auto &counter : _core_counters)
435 for(
const auto &core : counter.second.values)