Compute Library
 21.05
hwc.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2018 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_TEST_HWC
25 #define ARM_COMPUTE_TEST_HWC
26 
27 #include "hwc_names.hpp"
28 
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <poll.h>
32 #include <stddef.h>
33 #include <stdint.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <unistd.h>
39 
40 #ifndef DOXYGEN_SKIP_THIS
41 
42 #if defined(ANDROID) || defined(__ANDROID__)
43 /* We use _IOR_BAD/_IOW_BAD rather than _IOR/_IOW otherwise fails to compile with NDK-BUILD because of _IOC_TYPECHECK is defined, not because the paramter is invalid */
44 #define MALI_IOR(a, b, c) _IOR_BAD(a, b, c)
45 #define MALI_IOW(a, b, c) _IOW_BAD(a, b, c)
46 #else /* defined(ANDROID) || defined(__ANDROID__) */
47 #define MALI_IOR(a, b, c) _IOR(a, b, c)
48 #define MALI_IOW(a, b, c) _IOW(a, b, c)
49 #endif /* defined(ANDROID) || defined(__ANDROID__) */
50 
51 namespace mali_userspace
52 {
53 union uk_header
54 {
55  uint32_t id;
56  uint32_t ret;
57  uint64_t sizer;
58 };
59 
60 #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
61 #define BASE_MAX_COHERENT_GROUPS 16
62 
63 struct mali_base_gpu_core_props
64 {
65  uint32_t product_id;
66  uint16_t version_status;
67  uint16_t minor_revision;
68  uint16_t major_revision;
69  uint16_t padding;
70  uint32_t gpu_speed_mhz;
71  uint32_t gpu_freq_khz_max;
72  uint32_t gpu_freq_khz_min;
73  uint32_t log2_program_counter_size;
74  uint32_t texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
75  uint64_t gpu_available_memory_size;
76 };
77 
78 struct mali_base_gpu_l2_cache_props
79 {
80  uint8_t log2_line_size;
81  uint8_t log2_cache_size;
82  uint8_t num_l2_slices;
83  uint8_t padding[5];
84 };
85 
86 struct mali_base_gpu_tiler_props
87 {
88  uint32_t bin_size_bytes;
89  uint32_t max_active_levels;
90 };
91 
92 struct mali_base_gpu_thread_props
93 {
94  uint32_t max_threads;
95  uint32_t max_workgroup_size;
96  uint32_t max_barrier_size;
97  uint16_t max_registers;
98  uint8_t max_task_queue;
99  uint8_t max_thread_group_split;
100  uint8_t impl_tech;
101  uint8_t padding[7];
102 };
103 
104 struct mali_base_gpu_coherent_group
105 {
106  uint64_t core_mask;
107  uint16_t num_cores;
108  uint16_t padding[3];
109 };
110 
111 struct mali_base_gpu_coherent_group_info
112 {
113  uint32_t num_groups;
114  uint32_t num_core_groups;
115  uint32_t coherency;
116  uint32_t padding;
117  mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
118 };
119 
120 #define GPU_MAX_JOB_SLOTS 16
121 struct gpu_raw_gpu_props
122 {
123  uint64_t shader_present;
124  uint64_t tiler_present;
125  uint64_t l2_present;
126  uint64_t unused_1;
127 
128  uint32_t l2_features;
129  uint32_t suspend_size;
130  uint32_t mem_features;
131  uint32_t mmu_features;
132 
133  uint32_t as_present;
134 
135  uint32_t js_present;
136  uint32_t js_features[GPU_MAX_JOB_SLOTS];
137  uint32_t tiler_features;
138  uint32_t texture_features[3];
139 
140  uint32_t gpu_id;
141 
142  uint32_t thread_max_threads;
143  uint32_t thread_max_workgroup_size;
144  uint32_t thread_max_barrier_size;
145  uint32_t thread_features;
146 
147  uint32_t coherency_mode;
148 };
149 
150 struct mali_base_gpu_props
151 {
152  mali_base_gpu_core_props core_props;
153  mali_base_gpu_l2_cache_props l2_props;
154  uint64_t unused;
155  mali_base_gpu_tiler_props tiler_props;
156  mali_base_gpu_thread_props thread_props;
157  gpu_raw_gpu_props raw_props;
158  mali_base_gpu_coherent_group_info coherency_info;
159 };
160 
161 struct kbase_uk_gpuprops
162 {
163  uk_header header;
164  mali_base_gpu_props props;
165 };
166 
167 #define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0)
168 #define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1)
169 #define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2)
170 #define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3)
171 
172 #define KBASE_GPUPROP_PRODUCT_ID 1
173 #define KBASE_GPUPROP_MINOR_REVISION 3
174 #define KBASE_GPUPROP_MAJOR_REVISION 4
175 
176 #define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61
177 #define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62
178 #define KBASE_GPUPROP_COHERENCY_GROUP_0 64
179 #define KBASE_GPUPROP_COHERENCY_GROUP_1 65
180 #define KBASE_GPUPROP_COHERENCY_GROUP_2 66
181 #define KBASE_GPUPROP_COHERENCY_GROUP_3 67
182 #define KBASE_GPUPROP_COHERENCY_GROUP_4 68
183 #define KBASE_GPUPROP_COHERENCY_GROUP_5 69
184 #define KBASE_GPUPROP_COHERENCY_GROUP_6 70
185 #define KBASE_GPUPROP_COHERENCY_GROUP_7 71
186 #define KBASE_GPUPROP_COHERENCY_GROUP_8 72
187 #define KBASE_GPUPROP_COHERENCY_GROUP_9 73
188 #define KBASE_GPUPROP_COHERENCY_GROUP_10 74
189 #define KBASE_GPUPROP_COHERENCY_GROUP_11 75
190 #define KBASE_GPUPROP_COHERENCY_GROUP_12 76
191 #define KBASE_GPUPROP_COHERENCY_GROUP_13 77
192 #define KBASE_GPUPROP_COHERENCY_GROUP_14 78
193 #define KBASE_GPUPROP_COHERENCY_GROUP_15 79
194 
195 struct gpu_props
196 {
197  uint32_t product_id;
198  uint16_t minor_revision;
199  uint16_t major_revision;
200  uint32_t num_groups;
201  uint32_t num_core_groups;
202  uint64_t core_mask[16];
203 };
204 
205 static const struct
206 {
207  uint32_t type;
208  size_t offset;
209  int size;
210 } gpu_property_mapping[] =
211 {
212 #define PROP(name, member) \
213  { \
214  KBASE_GPUPROP_##name, offsetof(struct gpu_props, member), \
215  sizeof(((struct gpu_props *)0)->member) \
216  }
217 #define PROP2(name, member, off) \
218  { \
219  KBASE_GPUPROP_##name, offsetof(struct gpu_props, member) + off, \
220  sizeof(((struct gpu_props *)0)->member) \
221  }
222  PROP(PRODUCT_ID, product_id),
223  PROP(MINOR_REVISION, minor_revision),
224  PROP(MAJOR_REVISION, major_revision),
225  PROP(COHERENCY_NUM_GROUPS, num_groups),
226  PROP(COHERENCY_NUM_CORE_GROUPS, num_core_groups),
227  PROP2(COHERENCY_GROUP_0, core_mask, 0),
228  PROP2(COHERENCY_GROUP_1, core_mask, 1),
229  PROP2(COHERENCY_GROUP_2, core_mask, 2),
230  PROP2(COHERENCY_GROUP_3, core_mask, 3),
231  PROP2(COHERENCY_GROUP_4, core_mask, 4),
232  PROP2(COHERENCY_GROUP_5, core_mask, 5),
233  PROP2(COHERENCY_GROUP_6, core_mask, 6),
234  PROP2(COHERENCY_GROUP_7, core_mask, 7),
235  PROP2(COHERENCY_GROUP_8, core_mask, 8),
236  PROP2(COHERENCY_GROUP_9, core_mask, 9),
237  PROP2(COHERENCY_GROUP_10, core_mask, 10),
238  PROP2(COHERENCY_GROUP_11, core_mask, 11),
239  PROP2(COHERENCY_GROUP_12, core_mask, 12),
240  PROP2(COHERENCY_GROUP_13, core_mask, 13),
241  PROP2(COHERENCY_GROUP_14, core_mask, 14),
242  PROP2(COHERENCY_GROUP_15, core_mask, 15),
243 #undef PROP
244 #undef PROP2
245  { 0, 0, 0 }
246 };
247 
248 struct kbase_hwcnt_reader_metadata
249 {
250  uint64_t timestamp = 0;
251  uint32_t event_id = 0;
252  uint32_t buffer_idx = 0;
253 };
254 
255 namespace
256 {
257 /** Message header */
258 union kbase_uk_hwcnt_header
259 {
260  /* 32-bit number identifying the UK function to be called. */
261  uint32_t id;
262  /* The int return code returned by the called UK function. */
263  uint32_t ret;
264  /* Used to ensure 64-bit alignment of this union. Do not remove. */
265  uint64_t sizer;
266 };
267 
268 /** IOCTL parameters to check version */
269 struct kbase_uk_hwcnt_reader_version_check_args
270 {
271  union kbase_uk_hwcnt_header header;
272 
273  uint16_t major;
274  uint16_t minor;
275  uint8_t padding[4];
276 };
277 
278 union kbase_pointer
279 {
280  void *value;
281  uint32_t compat_value;
282  uint64_t sizer;
283 };
284 
285 struct kbase_ioctl_get_gpuprops
286 {
287  kbase_pointer buffer;
288  uint32_t size;
289  uint32_t flags;
290 };
291 
292 #define KBASE_IOCTL_TYPE 0x80
293 #define KBASE_IOCTL_GET_GPUPROPS MALI_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
294 
295 /** IOCTL parameters to set flags */
296 struct kbase_uk_hwcnt_reader_set_flags
297 {
298  union kbase_uk_hwcnt_header header;
299 
300  uint32_t create_flags;
301  uint32_t padding;
302 };
303 
304 /** IOCTL parameters to configure reader */
305 struct kbase_uk_hwcnt_reader_setup
306 {
307  union kbase_uk_hwcnt_header header;
308 
309  /* IN */
310  uint32_t buffer_count;
311  uint32_t jm_bm;
312  uint32_t shader_bm;
313  uint32_t tiler_bm;
314  uint32_t mmu_l2_bm;
315 
316  /* OUT */
317  int32_t fd;
318 };
319 
320 static const uint32_t HWCNT_READER_API = 1;
321 
322 struct uku_version_check_args
323 {
324  uk_header header;
325  uint16_t major;
326  uint16_t minor;
327  uint8_t padding[4];
328 };
329 
330 enum
331 {
332  UKP_FUNC_ID_CHECK_VERSION = 0,
333  /* Related to mali0 ioctl interface */
334  LINUX_UK_BASE_MAGIC = 0x80,
335  BASE_CONTEXT_CREATE_KERNEL_FLAGS = 0x2,
336  KBASE_FUNC_HWCNT_UK_FUNC_ID = 512,
337  KBASE_FUNC_GPU_PROPS_REG_DUMP = KBASE_FUNC_HWCNT_UK_FUNC_ID + 14,
338  KBASE_FUNC_HWCNT_READER_SETUP = KBASE_FUNC_HWCNT_UK_FUNC_ID + 36,
339  KBASE_FUNC_HWCNT_DUMP = KBASE_FUNC_HWCNT_UK_FUNC_ID + 11,
340  KBASE_FUNC_HWCNT_CLEAR = KBASE_FUNC_HWCNT_UK_FUNC_ID + 12,
341  KBASE_FUNC_SET_FLAGS = KBASE_FUNC_HWCNT_UK_FUNC_ID + 18,
342 
343  /* The ids of ioctl commands for the reader interface */
344  KBASE_HWCNT_READER = 0xBE,
345  KBASE_HWCNT_READER_GET_HWVER = MALI_IOR(KBASE_HWCNT_READER, 0x00, uint32_t),
346  KBASE_HWCNT_READER_GET_BUFFER_SIZE = MALI_IOR(KBASE_HWCNT_READER, 0x01, uint32_t),
347  KBASE_HWCNT_READER_DUMP = MALI_IOW(KBASE_HWCNT_READER, 0x10, uint32_t),
348  KBASE_HWCNT_READER_CLEAR = MALI_IOW(KBASE_HWCNT_READER, 0x11, uint32_t),
349  KBASE_HWCNT_READER_GET_BUFFER = MALI_IOR(KBASE_HWCNT_READER, 0x20, struct kbase_hwcnt_reader_metadata),
350  KBASE_HWCNT_READER_PUT_BUFFER = MALI_IOW(KBASE_HWCNT_READER, 0x21, struct kbase_hwcnt_reader_metadata),
351  KBASE_HWCNT_READER_SET_INTERVAL = MALI_IOW(KBASE_HWCNT_READER, 0x30, uint32_t),
352  KBASE_HWCNT_READER_ENABLE_EVENT = MALI_IOW(KBASE_HWCNT_READER, 0x40, uint32_t),
353  KBASE_HWCNT_READER_DISABLE_EVENT = MALI_IOW(KBASE_HWCNT_READER, 0x41, uint32_t),
354  KBASE_HWCNT_READER_GET_API_VERSION = MALI_IOW(KBASE_HWCNT_READER, 0xFF, uint32_t)
355 
356 };
357 
358 enum
359 {
360  PIPE_DESCRIPTOR_IN, /**< The index of a pipe's input descriptor. */
361  PIPE_DESCRIPTOR_OUT, /**< The index of a pipe's output descriptor. */
362 
363  PIPE_DESCRIPTOR_COUNT /**< The number of descriptors forming a pipe. */
364 };
365 
366 enum
367 {
368  POLL_DESCRIPTOR_SIGNAL, /**< The index of the signal descriptor in poll fds array. */
369  POLL_DESCRIPTOR_HWCNT_READER, /**< The index of the hwcnt reader descriptor in poll fds array. */
370 
371  POLL_DESCRIPTOR_COUNT /**< The number of descriptors poll is waiting for. */
372 };
373 
374 /** Write a single byte into the pipe to interrupt the reader thread */
375 typedef char poll_data_t;
376 }
377 
378 template <typename T>
379 static inline int mali_ioctl(int fd, T &arg)
380 {
381  auto *hdr = &arg.header;
382  const int cmd = _IOC(_IOC_READ | _IOC_WRITE, LINUX_UK_BASE_MAGIC, hdr->id, sizeof(T));
383 
384  if(ioctl(fd, cmd, &arg))
385  return -1;
386  if(hdr->ret)
387  return -1;
388 
389  return 0;
390 }
391 } // namespace mali_userspace
392 
393 #endif /* DOXYGEN_SKIP_THIS */
394 
395 #endif /* ARM_COMPUTE_TEST_HWC */
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
Definition: helpers.h:861
unsigned core_mask
Definition: MaliCounter.cpp:42
decltype(strategy::transforms) typedef type
unsigned gpu_id
Definition: MaliCounter.cpp:39
const unsigned int num_groups
Definition: Im2Col.cpp:153
void header(TokenStream &in, bool &valid)
Definition: MLGOParser.cpp:481