Compute Library
 21.02
helpers.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_HELPER_H
25 #define ARM_COMPUTE_HELPER_H
26 
27 #include "load_store_utility.h"
28 
29 #if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
30 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
31 #endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
32 
33 #if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
34 #pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
35 #endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
36 
37 #if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
38 #pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
39 #endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
40 
41 #if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
42 #pragma OPENCL EXTENSION cl_arm_printf : enable
43 #endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
44 
45 #define GPU_ARCH_MIDGARD 0x100
46 #define GPU_ARCH_BIFROST 0x200
47 
48 /** Concatenate two inputs.
49  *
50  * @param[in] a The first input to be concatenated
51  * @param[in] b The second input to be concatenated
52  *
53  * @return The concatenated output
54  */
55 #define CONCAT(a, b) a##b
56 
57 /** Expand the given vector
58  *
59  * @param[in] x The vector to be expanded
60  *
61  * @return The expanded output
62  */
63 #define EXPAND(x) x
64 
65 /** Clamp the given value between an upper and lower bound.
66  *
67  * @param[in] x The value to be clamped
68  * @param[in] min_val The lower bound
69  * @param[in] max_val The upper bound
70  *
71  * @return The clamped value.
72  */
73 #define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
74 
75 /** REVn reverses the given vector whose size is n.
76  * @name REVn
77  *
78  * @param[in] x The vector to be reversed
79  *
80  * @return The reversed vector
81  * @{
82  */
83 #define REV1(x) ((x))
84 #define REV2(x) ((x).s10)
85 #define REV3(x) ((x).s210)
86 #define REV4(x) ((x).s3210)
87 #define REV8(x) ((x).s76543210)
88 #define REV16(x) ((x).sFEDCBA9876543210)
89 /** @} */ // end of group REVn
90 
91 /** Reverse the given vector.
92  * @name REVERSE
93  *
94  * @param[in] x The vector to be reversed
95  * @param[in] s The size of the vector
96  *
97  * @return The reversed vector
98  * @{
99  */
100 #define REVERSE_STR(x, s) REV##s((x))
101 #define REVERSE(x, s) REVERSE_STR(x, s)
102 /** @} */ // end of group REVERSE
103 
104 /** Circular-right-shift (rotate-right) the vector of size s by the amount of n.
105  * @name ROTs_n
106  *
107  * @param[in] x The vector to be shifted
108  *
109  * @return The shifted vector
110  * @{
111  */
112 #define ROT1_0(x) ((x))
113 #define ROT1_1(x) ((x))
114 
115 #define ROT2_0(x) ((x))
116 #define ROT2_1(x) ((x).s10)
117 #define ROT2_2(x) ((x))
118 
119 #define ROT3_0(x) ((x))
120 #define ROT3_1(x) ((x).s201)
121 #define ROT3_2(x) ((x).s120)
122 #define ROT3_3(x) ((x))
123 
124 #define ROT4_0(x) ((x))
125 #define ROT4_1(x) ((x).s3012)
126 #define ROT4_2(x) ((x).s2301)
127 #define ROT4_3(x) ((x).s1230)
128 #define ROT4_4(x) ((x))
129 
130 #define ROT8_0(x) ((x))
131 #define ROT8_1(x) ((x).s70123456)
132 #define ROT8_2(x) ((x).s67012345)
133 #define ROT8_3(x) ((x).s56701234)
134 #define ROT8_4(x) ((x).s45670123)
135 #define ROT8_5(x) ((x).s34567012)
136 #define ROT8_6(x) ((x).s23456701)
137 #define ROT8_7(x) ((x).s12345670)
138 #define ROT8_8(x) ((x))
139 
140 #define ROT16_0(x) ((x))
141 #define ROT16_1(x) ((x).sF0123456789ABCDE)
142 #define ROT16_2(x) ((x).sEF0123456789ABCD)
143 #define ROT16_3(x) ((x).sDEF0123456789ABC)
144 #define ROT16_4(x) ((x).sCDEF0123456789AB)
145 #define ROT16_5(x) ((x).sBCDEF0123456789A)
146 #define ROT16_6(x) ((x).sABCDEF0123456789)
147 #define ROT16_7(x) ((x).s9ABCDEF012345678)
148 #define ROT16_8(x) ((x).s89ABCDEF01234567)
149 #define ROT16_9(x) ((x).s789ABCDEF0123456)
150 #define ROT16_10(x) ((x).s6789ABCDEF012345)
151 #define ROT16_11(x) ((x).s56789ABCDEF01234)
152 #define ROT16_12(x) ((x).s456789ABCDEF0123)
153 #define ROT16_13(x) ((x).s3456789ABCDEF012)
154 #define ROT16_14(x) ((x).s23456789ABCDEF01)
155 #define ROT16_15(x) ((x).s123456789ABCDEF0)
156 #define ROT16_16(x) ((x))
157 /** @} */ // end of group ROTs_n
158 
159 /** Circular-right-shift (rotate-right) the given vector by the given amount.
160  * @name ROTATE
161  *
162  * @param[in] x The vector to be shifted
163  * @param[in] s The size of the vector
164  * @param[in] n The amount to be shifted
165  *
166  * @return The shifted vector
167  * @{
168  */
169 #define ROTATE_STR(x, s, n) ROT##s##_##n(x)
170 #define ROTATE(x, s, n) ROTATE_STR(x, s, n)
171 /** @} */ // end of group ROTATE
172 
173 /** Creates a vector of size n filled with offset values corresponding to the location of each element.
174  * @name V_OFFSn
175  *
176  * @param[in] dt The data type of the output vector
177  *
178  * @return The vector filled with offset values
179  * @{
180  */
181 #define V_OFFS1(dt) (dt##1)(0)
182 #define V_OFFS2(dt) (dt##2)(0, 1)
183 #define V_OFFS3(dt) (dt##3)(0, 1, 2)
184 #define V_OFFS4(dt) (dt##4)(0, 1, 2, 3)
185 #define V_OFFS8(dt) (dt##8)(0, 1, 2, 3, 4, 5, 6, 7)
186 #define V_OFFS16(dt) (dt##16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
187 /** @} */ // end of group V_OFFSn
188 
189 /** Create a vector filled with offset values corresponding to the location of each element.
190  * @name VEC_OFFS
191  *
192  * @param[in] dt The data type of the output vector
193  * @param[in] s The size of the output vector
194  *
195  * @return The vector filled with offset values
196  * @{
197  */
198 #define VEC_OFFS_STR(dt, s) V_OFFS##s(dt)
199 #define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s)
200 /** @} */ // end of group VEC_OFFS
201 
202 #define VLOAD_STR(size) vload##size
203 #define VLOAD(size) VLOAD_STR(size)
204 
205 #define PIXEL_UNIT4 1
206 #define PIXEL_UNIT8 2
207 #define PIXEL_UNIT16 4
208 
209 /** Utility macro to convert a vector size in pixel unit.
210  *
211  * @name CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT
212  *
213  * @param[in] vec_size Vector size. Only 4,8 and 16 is supported
214  *
215  * @return The pixel unit (number of pixels)
216  * @{
217  */
218 #define CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR(vec_size) PIXEL_UNIT##vec_size
219 #define CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(vec_size) CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR(vec_size)
220 /** @} */ // end of group CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT
221 
222 #define read_image2d_floatx1(img, x_coord, y_coord) (float4)(read_imagef(img, (int2)(x_coord, y_coord)));
223 #define read_image2d_floatx2(img, x_coord, y_coord) (float8)(read_imagef(img, (int2)(x_coord, y_coord)), read_imagef(img, (int2)(x_coord + 1, y_coord)));
224 #define read_image2d_floatx4(img, x_coord, y_coord) (float16)(read_imagef(img, (int2)(x_coord, y_coord)), read_imagef(img, (int2)(x_coord + 1, y_coord)), read_imagef(img, (int2)(x_coord + 2, y_coord)), read_imagef(img, (int2)(x_coord + 3, y_coord)));
225 
226 #if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
227 #define read_image2d_halfx1(img, x_coord, y_coord) (half4)(read_imageh(img, (int2)(x_coord, y_coord)));
228 #define read_image2d_halfx2(img, x_coord, y_coord) (half8)(read_imageh(img, (int2)(x_coord, y_coord)), read_imageh(img, (int2)(x_coord + 1, y_coord)));
229 #define read_image2d_halfx4(img, x_coord, y_coord) (half16)(read_imageh(img, (int2)(x_coord, y_coord)), read_imageh(img, (int2)(x_coord + 1, y_coord)), read_imageh(img, (int2)(x_coord + 2, y_coord)), read_imageh(img, (int2)(x_coord + 3, y_coord)));
230 #endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
231 
232 /** Utility macro to read a 2D OpenCL image object.
233  *
234  * @note Coordinates are not normalized
235  *
236  * @param[in] data_type Data type
237  * @param[in] n0 Number of pixel to read. Only 1,2 and 4 is supported
238  * @param[in] img OpenCL image object
239  * @param[in] x_coord The x coordinate for the top-left pixel
240  * @param[in] y_coord The y coordinate for the top-left pixel
241  *
242  * @return Pixels from the 2D OpenCL image object
243  * @{
244  */
245 #define READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord) read_image2d_##data_type##x##n0(img, x_coord, y_coord)
246 #define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord) READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord)
247 
248 #define VSTORE_STR(size) vstore##size
249 #define VSTORE(size) VSTORE_STR(size)
250 
251 #define float1 float
252 #define half1 half
253 #define char1 char
254 #define uchar1 uchar
255 #define short1 short
256 #define ushort1 ushort
257 #define int1 int
258 #define uint1 uint
259 #define long1 long
260 #define ulong1 ulong
261 #define double1 double
262 
263 #define vload1(OFFSET, PTR) *(OFFSET + PTR)
264 #define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA
265 
266 /** Extended partial vstore that correctly handles scalar values as well.
267  * Store the **lower** 0 to (n-1)th elements of the given vector while minimising the amount of vstore ops
268  * @name VSTORE_PARTIAL
269  *
270  * @note With this macro, the passed data can be both a vector and a scalar
271  * @note @p store_size needs to be <= @p size
272  * eg 1: Valid
273  * VSTORE_PARTIAL(16, 15) ...;
274  * eg 2: Invalid
275  * VSTORE_PARTIAL(4, 7) ...;
276  *
277  * @param[in] size The width of @p DATA. Supported values: 1(scalar), 2, 3, 4, 8, 16
278  * @param[in] store_size The number of lower elements to store. Supported values: 1-16, but has to be <= @p size
279  * @{
280  */
281 #define VSTORE_PARTIAL_STR(size, store_size) vstore_partial_##size##_##store_size
282 #define VSTORE_PARTIAL(size, store_size) VSTORE_PARTIAL_STR(size, store_size)
283 
284 #define NO_STORE(data, offs, ptr) \
285  { \
286  }
287 
288 // Size == 1 (scalar)
289 #define vstore_partial_1_0 NO_STORE
290 #define vstore_partial_1_1 vstore1
291 #define vstore_partial_1_2 NO_STORE
292 #define vstore_partial_1_3 NO_STORE
293 #define vstore_partial_1_4 NO_STORE
294 #define vstore_partial_1_5 NO_STORE
295 #define vstore_partial_1_6 NO_STORE
296 #define vstore_partial_1_7 NO_STORE
297 #define vstore_partial_1_8 NO_STORE
298 #define vstore_partial_1_9 NO_STORE
299 #define vstore_partial_1_10 NO_STORE
300 #define vstore_partial_1_11 NO_STORE
301 #define vstore_partial_1_12 NO_STORE
302 #define vstore_partial_1_13 NO_STORE
303 #define vstore_partial_1_14 NO_STORE
304 #define vstore_partial_1_15 NO_STORE
305 #define vstore_partial_1_16 NO_STORE
306 // Size == 2
307 #define vstore_partial_2_0 NO_STORE
308 #define vstore_partial_2_1 vstore_partial_1
309 #define vstore_partial_2_2 vstore_partial_2
310 #define vstore_partial_2_3 NO_STORE
311 #define vstore_partial_2_4 NO_STORE
312 #define vstore_partial_2_5 NO_STORE
313 #define vstore_partial_2_6 NO_STORE
314 #define vstore_partial_2_7 NO_STORE
315 #define vstore_partial_2_8 NO_STORE
316 #define vstore_partial_2_9 NO_STORE
317 #define vstore_partial_2_10 NO_STORE
318 #define vstore_partial_2_11 NO_STORE
319 #define vstore_partial_2_12 NO_STORE
320 #define vstore_partial_2_13 NO_STORE
321 #define vstore_partial_2_14 NO_STORE
322 #define vstore_partial_2_15 NO_STORE
323 #define vstore_partial_2_16 NO_STORE
324 // Size == 3
325 #define vstore_partial_3_0 NO_STORE
326 #define vstore_partial_3_1 vstore_partial_1
327 #define vstore_partial_3_2 vstore_partial_2
328 #define vstore_partial_3_3 vstore_partial_3
329 #define vstore_partial_3_4 NO_STORE
330 #define vstore_partial_3_5 NO_STORE
331 #define vstore_partial_3_6 NO_STORE
332 #define vstore_partial_3_7 NO_STORE
333 #define vstore_partial_3_8 NO_STORE
334 #define vstore_partial_3_9 NO_STORE
335 #define vstore_partial_3_10 NO_STORE
336 #define vstore_partial_3_11 NO_STORE
337 #define vstore_partial_3_12 NO_STORE
338 #define vstore_partial_3_13 NO_STORE
339 #define vstore_partial_3_14 NO_STORE
340 #define vstore_partial_3_15 NO_STORE
341 #define vstore_partial_3_16 NO_STORE
342 // Size == 4
343 #define vstore_partial_4_0 NO_STORE
344 #define vstore_partial_4_1 vstore_partial_1
345 #define vstore_partial_4_2 vstore_partial_2
346 #define vstore_partial_4_3 vstore_partial_3
347 #define vstore_partial_4_4 vstore_partial_4
348 #define vstore_partial_4_5 NO_STORE
349 #define vstore_partial_4_6 NO_STORE
350 #define vstore_partial_4_7 NO_STORE
351 #define vstore_partial_4_8 NO_STORE
352 #define vstore_partial_4_9 NO_STORE
353 #define vstore_partial_4_10 NO_STORE
354 #define vstore_partial_4_11 NO_STORE
355 #define vstore_partial_4_12 NO_STORE
356 #define vstore_partial_4_13 NO_STORE
357 #define vstore_partial_4_14 NO_STORE
358 #define vstore_partial_4_15 NO_STORE
359 #define vstore_partial_4_16 NO_STORE
360 // Size == 8
361 #define vstore_partial_8_0 NO_STORE
362 #define vstore_partial_8_1 vstore_partial_1
363 #define vstore_partial_8_2 vstore_partial_2
364 #define vstore_partial_8_3 vstore_partial_3
365 #define vstore_partial_8_4 vstore_partial_4
366 #define vstore_partial_8_5 vstore_partial_5
367 #define vstore_partial_8_6 vstore_partial_6
368 #define vstore_partial_8_7 vstore_partial_7
369 #define vstore_partial_8_8 vstore_partial_8
370 #define vstore_partial_8_9 NO_STORE
371 #define vstore_partial_8_10 NO_STORE
372 #define vstore_partial_8_11 NO_STORE
373 #define vstore_partial_8_12 NO_STORE
374 #define vstore_partial_8_13 NO_STORE
375 #define vstore_partial_8_14 NO_STORE
376 #define vstore_partial_8_15 NO_STORE
377 #define vstore_partial_8_16 NO_STORE
378 // Size == 16
379 #define vstore_partial_16_0 NO_STORE
380 #define vstore_partial_16_1 vstore_partial_1
381 #define vstore_partial_16_2 vstore_partial_2
382 #define vstore_partial_16_3 vstore_partial_3
383 #define vstore_partial_16_4 vstore_partial_4
384 #define vstore_partial_16_5 vstore_partial_5
385 #define vstore_partial_16_6 vstore_partial_6
386 #define vstore_partial_16_7 vstore_partial_7
387 #define vstore_partial_16_8 vstore_partial_8
388 #define vstore_partial_16_9 vstore_partial_9
389 #define vstore_partial_16_10 vstore_partial_10
390 #define vstore_partial_16_11 vstore_partial_11
391 #define vstore_partial_16_12 vstore_partial_12
392 #define vstore_partial_16_13 vstore_partial_13
393 #define vstore_partial_16_14 vstore_partial_14
394 #define vstore_partial_16_15 vstore_partial_15
395 #define vstore_partial_16_16 vstore_partial_16
396 
397 /** Partial vstore. Store the **lower** 0 to (n-1)th elements of the given vector while minimising the amount of vstore ops
398  * @name vstore_partial_n
399  *
400  * @note @p DATA needs to be a vector not a scalar
401  * @note n needs to be <= the vector width of the input variable @p DATA
402  * eg 1: Valid
403  * vstore_partial_15(var:float16, 0, 0xabcd);
404  * eg 2: Invalid
405  * vstore_partial_7(var:float4, 0, 0xabcd);
406  *
407  * @note in cases n == 1, 2, 3, 4, 8, 16, no extra vstore is invoked, thus there's no performance penalty.
408  *
409  * @param[in] DATA The name of the variable
410  * @param[in] OFFSET Offset in n
411  * @param[in] PTR The base pointer
412  * @{
413  */
414 #define vstore_partial_1(DATA, OFFSET, PTR) \
415  vstore1(DATA.s0, OFFSET, PTR);
416 
417 #define vstore_partial_2(DATA, OFFSET, PTR) \
418  vstore2(DATA.s01, OFFSET, PTR);
419 
420 #define vstore_partial_3(DATA, OFFSET, PTR) \
421  vstore3(DATA.s012, OFFSET, PTR);
422 
423 #define vstore_partial_4(DATA, OFFSET, PTR) \
424  vstore4(DATA.s0123, OFFSET, PTR);
425 
426 #define vstore_partial_5(DATA, OFFSET, PTR) \
427  vstore_partial_4(DATA.s0123, OFFSET, PTR); \
428  vstore1(DATA.s4, OFFSET, PTR + 4);
429 
430 #define vstore_partial_6(DATA, OFFSET, PTR) \
431  vstore_partial_4(DATA.s0123, OFFSET, PTR); \
432  vstore_partial_2(DATA.s45, OFFSET, PTR + 4);
433 
434 #define vstore_partial_7(DATA, OFFSET, PTR) \
435  vstore_partial_4(DATA.s0123, OFFSET, PTR); \
436  vstore_partial_3(DATA.s456, OFFSET, PTR + 4);
437 
438 #define vstore_partial_8(DATA, OFFSET, PTR) \
439  vstore8(DATA.s01234567, OFFSET, PTR);
440 
441 #define vstore_partial_9(DATA, OFFSET, PTR) \
442  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
443  vstore1(DATA.s8, OFFSET, PTR + 8);
444 
445 #define vstore_partial_10(DATA, OFFSET, PTR) \
446  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
447  vstore_partial_2(DATA.s89, OFFSET, PTR + 8);
448 
449 #define vstore_partial_11(DATA, OFFSET, PTR) \
450  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
451  vstore_partial_3(DATA.s89a, OFFSET, PTR + 8);
452 
453 #define vstore_partial_12(DATA, OFFSET, PTR) \
454  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
455  vstore_partial_4(DATA.s89ab, OFFSET, PTR + 8);
456 
457 #define vstore_partial_13(DATA, OFFSET, PTR) \
458  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
459  vstore_partial_5(DATA.s89abcdef, OFFSET, PTR + 8);
460 
461 #define vstore_partial_14(DATA, OFFSET, PTR) \
462  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
463  vstore_partial_6(DATA.s89abcdef, OFFSET, PTR + 8);
464 
465 #define vstore_partial_15(DATA, OFFSET, PTR) \
466  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
467  vstore_partial_7(DATA.s89abcdef, OFFSET, PTR + 8);
468 
469 #define vstore_partial_16(DATA, OFFSET, PTR) \
470  vstore16(DATA, OFFSET, PTR);
471 /** @} */ // end of groupd vstore_partial_n
472 /** @} */ // end of groupd VSTORE_PARTIAL
473 
474 // Convert built-in functions with _sat modifier are not supported in floating point so we create defines
475 // without _sat to overcome this issue
476 #define convert_float_sat convert_float
477 #define convert_float1_sat convert_float
478 #define convert_float2_sat convert_float2
479 #define convert_float3_sat convert_float3
480 #define convert_float4_sat convert_float4
481 #define convert_float8_sat convert_float8
482 #define convert_float16_sat convert_float16
483 #define convert_half_sat convert_float
484 #define convert_half1_sat convert_half
485 #define convert_half2_sat convert_half2
486 #define convert_half3_sat convert_half3
487 #define convert_half4_sat convert_half4
488 #define convert_half8_sat convert_half8
489 #define convert_half16_sat convert_half16
490 
491 #define convert_float1 convert_float
492 #define convert_half1 convert_half
493 #define convert_char1 convert_char
494 #define convert_uchar1 convert_uchar
495 #define convert_short1 convert_short
496 #define convert_ushort1 convert_ushort
497 #define convert_int1 convert_int
498 #define convert_uint1 convert_uint
499 #define convert_long1 convert_long
500 #define convert_ulong1 convert_ulong
501 #define convert_double1 convert_double
502 
503 #define convert_char1_sat convert_char_sat
504 #define convert_uchar1_sat convert_uchar_sat
505 #define convert_short1_sat convert_short_sat
506 #define convert_ushort1_sat convert_ushort_sat
507 #define convert_int1_sat convert_int_sat
508 #define convert_uint1_sat convert_uint_sat
509 #define convert_long1_sat convert_long_sat
510 #define convert_ulong1_sat convert_ulong_sat
511 #define convert_double1_sat convert_double_sat
512 
513 #define VEC_DATA_TYPE_STR(type, size) type##size
514 #define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
515 
516 #define CONVERT_STR(x, type) (convert_##type((x)))
517 #define CONVERT(x, type) CONVERT_STR(x, type)
518 
519 #define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
520 #define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
521 
522 #define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
523 #define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
524 
525 #define select_vec_dt_uchar(size) uchar##size
526 #define select_vec_dt_char(size) char##size
527 #define select_vec_dt_ushort(size) ushort##size
528 #define select_vec_dt_short(size) short##size
529 #define select_vec_dt_half(size) short##size
530 #define select_vec_dt_uint(size) uint##size
531 #define select_vec_dt_int(size) int##size
532 #define select_vec_dt_float(size) int##size
533 #define select_vec_dt_ulong(size) ulong##size
534 #define select_vec_dt_long(size) long##size
535 
536 #define SELECT_VEC_DATA_TYPE_STR(type, size) select_vec_dt_##type(size)
537 #define SELECT_VEC_DATA_TYPE(type, size) SELECT_VEC_DATA_TYPE_STR(type, size)
538 #define SELECT_DATA_TYPE(type) SELECT_VEC_DATA_TYPE_STR(type, 1)
539 
540 #define signed_int_vec_dt_uchar(size) char##size
541 #define signed_int_vec_dt_char(size) char##size
542 #define signed_int_vec_dt_ushort(size) short##size
543 #define signed_int_vec_dt_short(size) short##size
544 #define signed_int_vec_dt_half(size) short##size
545 #define signed_int_vec_dt_uint(size) int##size
546 #define signed_int_vec_dt_int(size) int##size
547 #define signed_int_vec_dt_float(size) int##size
548 #define signed_int_vec_dt_ulong(size) long##size
549 #define signed_int_vec_dt_long(size) long##size
550 
551 #define SIGNED_INT_VEC_DATA_TYPE_STR(type, size) signed_int_vec_dt_##type(size)
552 #define SIGNED_INT_VEC_DATA_TYPE(type, size) SIGNED_INT_VEC_DATA_TYPE_STR(type, size)
553 #define SIGNED_INT_DATA_TYPE(type) SIGNED_INT_VEC_DATA_TYPE_STR(type, 1)
554 
555 #define sum_reduce_1(x) (x)
556 #define sum_reduce_2(x) ((x).s0) + ((x).s1)
557 #define sum_reduce_3(x) sum_reduce_2((x).s01) + ((x).s2)
558 #define sum_reduce_4(x) sum_reduce_2((x).s01) + sum_reduce_2((x).s23)
559 #define sum_reduce_8(x) sum_reduce_4((x).s0123) + sum_reduce_4((x).s4567)
560 #define sum_reduce_16(x) sum_reduce_8((x).s01234567) + sum_reduce_8((x).s89ABCDEF)
561 
562 #define SUM_REDUCE_STR(x, size) sum_reduce_##size(x)
563 #define SUM_REDUCE(x, size) SUM_REDUCE_STR(x, size)
564 
565 #define max_reduce_1(x) (x)
566 #define max_reduce_2(x) max(((x).s0), ((x).s1))
567 #define max_reduce_3(x) max(max_reduce_2((x).s01), ((x).s2))
568 #define max_reduce_4(x) max(max_reduce_2((x).s01), max_reduce_2((x).s23))
569 #define max_reduce_8(x) max(max_reduce_4((x).s0123), max_reduce_4((x).s4567))
570 #define max_reduce_16(x) max(max_reduce_8((x).s01234567), max_reduce_8((x).s89ABCDEF))
571 
572 #define MAX_REDUCE_STR(x, size) max_reduce_##size(x)
573 #define MAX_REDUCE(x, size) MAX_REDUCE_STR(x, size)
574 
575 #define VECTOR_DECLARATION(name) \
576  __global uchar *name##_ptr, \
577  uint name##_stride_x, \
578  uint name##_step_x, \
579  uint name##_offset_first_element_in_bytes
580 
581 #define IMAGE_DECLARATION(name) \
582  __global uchar *name##_ptr, \
583  uint name##_stride_x, \
584  uint name##_step_x, \
585  uint name##_stride_y, \
586  uint name##_step_y, \
587  uint name##_offset_first_element_in_bytes
588 
589 #define TENSOR3D_DECLARATION(name) \
590  __global uchar *name##_ptr, \
591  uint name##_stride_x, \
592  uint name##_step_x, \
593  uint name##_stride_y, \
594  uint name##_step_y, \
595  uint name##_stride_z, \
596  uint name##_step_z, \
597  uint name##_offset_first_element_in_bytes
598 
599 #define TENSOR4D_DECLARATION(name) \
600  __global uchar *name##_ptr, \
601  uint name##_stride_x, \
602  uint name##_step_x, \
603  uint name##_stride_y, \
604  uint name##_step_y, \
605  uint name##_stride_z, \
606  uint name##_step_z, \
607  uint name##_stride_w, \
608  uint name##_step_w, \
609  uint name##_offset_first_element_in_bytes
610 
611 #define CONVERT_TO_VECTOR_STRUCT(name) \
612  update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
613 
614 #define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
615  update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
616 
617 #define CONVERT_TO_IMAGE_STRUCT(name) \
618  update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
619 
620 #define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
621  update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)
622 
623 #define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
624  update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
625 
626 #define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
627  update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)
628 
629 #define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
630  update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
631 
632 #define CONVERT_TO_TENSOR3D_STRUCT(name) \
633  update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
634  name##_stride_z, name##_step_z)
635 
636 #define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
637  update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0)
638 
639 #define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
640  update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
641  name##_stride_z, name##_step_z, name##_stride_w, name##_step_w, mod_size)
642 
643 #define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
644  update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)
645 
646 #define CONVERT_TO_TENSOR3D_STRUCT_NO_UPDATE_PTR(name) \
647  tensor3D_ptr_no_update(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
648  name##_stride_z, name##_step_z)
649 
650 /** Structure to hold Vector information */
651 typedef struct Vector
652 {
653  __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
654  int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
655  int stride_x; /**< Stride of the image in X dimension (in bytes) */
656 } Vector;
657 
658 /** Structure to hold Image information */
659 typedef struct Image
660 {
661  __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
662  int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
663  int stride_x; /**< Stride of the image in X dimension (in bytes) */
664  int stride_y; /**< Stride of the image in Y dimension (in bytes) */
665 } Image;
666 
667 /** Structure to hold 3D tensor information */
668 typedef struct Tensor3D
669 {
670  __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
671  int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
672  int stride_x; /**< Stride of the image in X dimension (in bytes) */
673  int stride_y; /**< Stride of the image in Y dimension (in bytes) */
674  int stride_z; /**< Stride of the image in Z dimension (in bytes) */
675 } Tensor3D;
676 
677 /** Structure to hold 4D tensor information */
678 typedef struct Tensor4D
679 {
680  __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
681  int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
682  int stride_x; /**< Stride of the image in X dimension (in bytes) */
683  int stride_y; /**< Stride of the image in Y dimension (in bytes) */
684  int stride_z; /**< Stride of the image in Z dimension (in bytes) */
685  int stride_w; /**< Stride of the image in W dimension (in bytes) */
686 } Tensor4D;
687 
688 /** Wrap vector information into an Vector structure, and make the pointer point at this workitem's data.
689  *
690  * @param[in] ptr Pointer to the starting postion of the buffer
691  * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
692  * @param[in] stride_x Stride of the vector in X dimension (in bytes)
693  * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
694  *
695  * @return An image object
696  */
697 inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
698 {
699  Vector vector =
700  {
701  .ptr = ptr,
702  .offset_first_element_in_bytes = offset_first_element_in_bytes,
703  .stride_x = stride_x,
704  };
705  vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
706  return vector;
707 }
708 
709 /** Wrap image information into an Image structure, and make the pointer point at this workitem's data.
710  *
711  * @param[in] ptr Pointer to the starting postion of the buffer
712  * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
713  * @param[in] stride_x Stride of the image in X dimension (in bytes)
714  * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
715  * @param[in] stride_y Stride of the image in Y dimension (in bytes)
716  * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
717  *
718  * @return An image object
719  */
720 inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
721 {
722  Image img =
723  {
724  .ptr = ptr,
725  .offset_first_element_in_bytes = offset_first_element_in_bytes,
726  .stride_x = stride_x,
727  .stride_y = stride_y
728  };
729  img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
730  return img;
731 }
732 
733 /** Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data.
734  *
735  * @param[in] ptr Pointer to the starting postion of the buffer
736  * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
737  * @param[in] stride_x Stride of the image in X dimension (in bytes)
738  * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
739  * @param[in] stride_y Stride of the image in Y dimension (in bytes)
740  * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
741  * @param[in] stride_z Stride of the image in Z dimension (in bytes)
742  * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
743  *
744  * @return A 3D tensor object
745  */
746 inline Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
747 {
748  Image img =
749  {
750  .ptr = ptr,
751  .offset_first_element_in_bytes = offset_first_element_in_bytes,
752  .stride_x = stride_x,
753  .stride_y = stride_y
754  };
755  img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
756  return img;
757 }
758 
759 /** Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data.
760  *
761  * @param[in] ptr Pointer to the starting postion of the buffer
762  * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
763  * @param[in] stride_x Stride of the image in X dimension (in bytes)
764  * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
765  * @param[in] stride_y Stride of the image in Y dimension (in bytes)
766  * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
767  * @param[in] stride_z Stride of the image in Z dimension (in bytes)
768  * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
769  *
770  * @return A 3D tensor object
771  */
772 inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
773 {
774  Tensor3D tensor =
775  {
776  .ptr = ptr,
777  .offset_first_element_in_bytes = offset_first_element_in_bytes,
778  .stride_x = stride_x,
779  .stride_y = stride_y,
780  .stride_z = stride_z
781  };
782  tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
783  return tensor;
784 }
785 
786 /** Wrap 3D tensor information into an tensor structure.
787  *
788  * @param[in] ptr Pointer to the starting postion of the buffer
789  * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
790  * @param[in] stride_x Stride of the image in X dimension (in bytes)
791  * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
792  * @param[in] stride_y Stride of the image in Y dimension (in bytes)
793  * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
794  * @param[in] stride_z Stride of the image in Z dimension (in bytes)
795  * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
796  *
797  * @return A 3D tensor object
798  */
799 inline Tensor3D tensor3D_ptr_no_update(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
800 {
801  Tensor3D tensor =
802  {
803  .ptr = ptr,
804  .offset_first_element_in_bytes = offset_first_element_in_bytes,
805  .stride_x = stride_x,
806  .stride_y = stride_y,
807  .stride_z = stride_z
808  };
809  return tensor;
810 }
811 
812 inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w,
813  uint step_w,
814  uint mod_size)
815 {
816  Tensor4D tensor =
817  {
818  .ptr = ptr,
819  .offset_first_element_in_bytes = offset_first_element_in_bytes,
820  .stride_x = stride_x,
821  .stride_y = stride_y,
822  .stride_z = stride_z,
823  .stride_w = stride_w
824  };
825 
826  tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + (get_global_id(2) / mod_size) * step_w;
827  return tensor;
828 }
829 
830 /** Get the pointer position of a Vector
831  *
832  * @param[in] vec Pointer to the starting position of the buffer
833  * @param[in] x Relative X position
834  */
835 inline __global const uchar *vector_offset(const Vector *vec, int x)
836 {
837  return vec->ptr + x * vec->stride_x;
838 }
839 
840 /** Get the pointer position of a Image
841  *
842  * @param[in] img Pointer to the starting position of the buffer
843  * @param[in] x Relative X position
844  * @param[in] y Relative Y position
845  */
846 inline __global uchar *offset(const Image *img, int x, int y)
847 {
848  return img->ptr + x * img->stride_x + y * img->stride_y;
849 }
850 
851 /** Get the pointer position of a Tensor3D
852  *
853  * @param[in] tensor Pointer to the starting position of the buffer
854  * @param[in] x Relative X position
855  * @param[in] y Relative Y position
856  * @param[in] z Relative Z position
857  */
858 inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
859 {
860  return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
861 }
862 
863 /** Get the pointer position of a Tensor4D
864  *
865  * @param[in] tensor Pointer to the starting position of the buffer
866  * @param[in] x Relative X position
867  * @param[in] y Relative Y position
868  * @param[in] z Relative Z position
869  * @param[in] w Relative W position
870  */
871 inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
872 {
873  return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + w * tensor->stride_w;
874 }
875 
876 /** Get the offset for a given linear index of a Tensor3D
877  *
878  * @param[in] tensor Pointer to the starting position of the buffer
879  * @param[in] width Width of the input tensor
880  * @param[in] height Height of the input tensor
881  * @param[in] depth Depth of the input tensor
882  * @param[in] index Linear index
883  */
884 inline __global const uchar *tensor3D_index2ptr(const Tensor3D *tensor, uint width, uint height, uint depth, uint index)
885 {
886  uint num_elements = width * height;
887 
888  const uint z = index / num_elements;
889 
890  index %= num_elements;
891 
892  const uint y = index / width;
893 
894  index %= width;
895 
896  const uint x = index;
897 
898  return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + tensor->offset_first_element_in_bytes;
899 }
900 
901 #endif // _HELPER_H
Structure to hold Vector information.
Definition: helpers.h:651
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
Definition: helpers.h:846
SimpleTensor< float > w
Definition: DFT.cpp:156
Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
Wrap image information into an Image structure, and make the pointer point at this workitem&#39;s data...
Definition: helpers.h:720
Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem&#39;s da...
Definition: helpers.h:772
__global uchar * ptr
Pointer to the starting postion of the buffer.
Definition: helpers.h:680
int stride_z
Stride of the image in Z dimension (in bytes)
Definition: helpers.h:674
struct Image Image
Structure to hold Image information.
struct Tensor3D Tensor3D
Structure to hold 3D tensor information.
__global const uchar * tensor3D_index2ptr(const Tensor3D *tensor, uint width, uint height, uint depth, uint index)
Get the offset for a given linear index of a Tensor3D.
Definition: helpers.h:884
int stride_x
Stride of the image in X dimension (in bytes)
Definition: helpers.h:672
struct Tensor4D Tensor4D
Structure to hold 4D tensor information.
int offset_first_element_in_bytes
The offset of the first element in the source image.
Definition: helpers.h:662
Structure to hold 3D tensor information.
Definition: helpers.h:668
Structure to hold 4D tensor information.
Definition: helpers.h:678
int stride_w
Stride of the image in W dimension (in bytes)
Definition: helpers.h:685
__global const uchar * tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
Get the pointer position of a Tensor4D.
Definition: helpers.h:871
int stride_x
Stride of the image in X dimension (in bytes)
Definition: helpers.h:655
__global uchar * ptr
Pointer to the starting postion of the buffer.
Definition: helpers.h:653
Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Wrap 3D tensor information into an image structure, and make the pointer point at this workitem&#39;s dat...
Definition: helpers.h:746
int stride_y
Stride of the image in Y dimension (in bytes)
Definition: helpers.h:683
Structure to hold Image information.
Definition: helpers.h:659
int offset_first_element_in_bytes
The offset of the first element in the source image.
Definition: helpers.h:654
int offset_first_element_in_bytes
The offset of the first element in the source image.
Definition: helpers.h:681
__global uchar * ptr
Pointer to the starting postion of the buffer.
Definition: helpers.h:661
__global const uchar * vector_offset(const Vector *vec, int x)
Get the pointer position of a Vector.
Definition: helpers.h:835
struct Vector Vector
Structure to hold Vector information.
Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
Wrap vector information into an Vector structure, and make the pointer point at this workitem&#39;s data...
Definition: helpers.h:697
__global uchar * ptr
Pointer to the starting postion of the buffer.
Definition: helpers.h:670
int stride_x
Stride of the image in X dimension (in bytes)
Definition: helpers.h:682
int stride_y
Stride of the image in Y dimension (in bytes)
Definition: helpers.h:673
int stride_z
Stride of the image in Z dimension (in bytes)
Definition: helpers.h:684
int offset_first_element_in_bytes
The offset of the first element in the source image.
Definition: helpers.h:671
Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w, uint step_w, uint mod_size)
Definition: helpers.h:812
int stride_y
Stride of the image in Y dimension (in bytes)
Definition: helpers.h:664
Tensor3D tensor3D_ptr_no_update(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Wrap 3D tensor information into an tensor structure.
Definition: helpers.h:799
__global const uchar * tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Get the pointer position of a Tensor3D.
Definition: helpers.h:858
int stride_x
Stride of the image in X dimension (in bytes)
Definition: helpers.h:663