Compute Library
 21.08
helpers.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_HELPER_H
25 #define ARM_COMPUTE_HELPER_H
26 
27 #include "load_store_utility.h"
28 
29 #if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
30 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
31 #endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
32 
33 #if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
34 #pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
35 #endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
36 
37 #if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
38 #pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
39 #endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
40 
41 #if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
42 #pragma OPENCL EXTENSION cl_arm_printf : enable
43 #endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
44 
45 #define GPU_ARCH_MIDGARD 0x100
46 #define GPU_ARCH_BIFROST 0x200
47 
48 /** Concatenate two inputs.
49  *
50  * @param[in] a The first input to be concatenated
51  * @param[in] b The second input to be concatenated
52  *
53  * @return The concatenated output
54  */
55 #define CONCAT(a, b) a##b
56 
57 /** Expand the given vector
58  *
59  * @param[in] x The vector to be expanded
60  *
61  * @return The expanded output
62  */
63 #define EXPAND(x) x
64 
65 /** Clamp the given value between an upper and lower bound.
66  *
67  * @param[in] x The value to be clamped
68  * @param[in] min_val The lower bound
69  * @param[in] max_val The upper bound
70  *
71  * @return The clamped value.
72  */
73 #define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
74 
75 /** REVn reverses the given vector whose size is n.
76  * @name REVn
77  *
78  * @param[in] x The vector to be reversed
79  *
80  * @return The reversed vector
81  * @{
82  */
83 #define REV1(x) ((x))
84 #define REV2(x) ((x).s10)
85 #define REV3(x) ((x).s210)
86 #define REV4(x) ((x).s3210)
87 #define REV8(x) ((x).s76543210)
88 #define REV16(x) ((x).sFEDCBA9876543210)
89 /** @} */ // end of group REVn
90 
91 /** Reverse the given vector.
92  * @name REVERSE
93  *
94  * @param[in] x The vector to be reversed
95  * @param[in] s The size of the vector
96  *
97  * @return The reversed vector
98  * @{
99  */
100 #define REVERSE_STR(x, s) REV##s((x))
101 #define REVERSE(x, s) REVERSE_STR(x, s)
102 /** @} */ // end of group REVERSE
103 
104 /** Circular-right-shift (rotate-right) the vector of size s by the amount of n.
105  * @name ROTs_n
106  *
107  * @param[in] x The vector to be shifted
108  *
109  * @return The shifted vector
110  * @{
111  */
112 #define ROT1_0(x) ((x))
113 #define ROT1_1(x) ((x))
114 
115 #define ROT2_0(x) ((x))
116 #define ROT2_1(x) ((x).s10)
117 #define ROT2_2(x) ((x))
118 
119 #define ROT3_0(x) ((x))
120 #define ROT3_1(x) ((x).s201)
121 #define ROT3_2(x) ((x).s120)
122 #define ROT3_3(x) ((x))
123 
124 #define ROT4_0(x) ((x))
125 #define ROT4_1(x) ((x).s3012)
126 #define ROT4_2(x) ((x).s2301)
127 #define ROT4_3(x) ((x).s1230)
128 #define ROT4_4(x) ((x))
129 
130 #define ROT8_0(x) ((x))
131 #define ROT8_1(x) ((x).s70123456)
132 #define ROT8_2(x) ((x).s67012345)
133 #define ROT8_3(x) ((x).s56701234)
134 #define ROT8_4(x) ((x).s45670123)
135 #define ROT8_5(x) ((x).s34567012)
136 #define ROT8_6(x) ((x).s23456701)
137 #define ROT8_7(x) ((x).s12345670)
138 #define ROT8_8(x) ((x))
139 
140 #define ROT16_0(x) ((x))
141 #define ROT16_1(x) ((x).sF0123456789ABCDE)
142 #define ROT16_2(x) ((x).sEF0123456789ABCD)
143 #define ROT16_3(x) ((x).sDEF0123456789ABC)
144 #define ROT16_4(x) ((x).sCDEF0123456789AB)
145 #define ROT16_5(x) ((x).sBCDEF0123456789A)
146 #define ROT16_6(x) ((x).sABCDEF0123456789)
147 #define ROT16_7(x) ((x).s9ABCDEF012345678)
148 #define ROT16_8(x) ((x).s89ABCDEF01234567)
149 #define ROT16_9(x) ((x).s789ABCDEF0123456)
150 #define ROT16_10(x) ((x).s6789ABCDEF012345)
151 #define ROT16_11(x) ((x).s56789ABCDEF01234)
152 #define ROT16_12(x) ((x).s456789ABCDEF0123)
153 #define ROT16_13(x) ((x).s3456789ABCDEF012)
154 #define ROT16_14(x) ((x).s23456789ABCDEF01)
155 #define ROT16_15(x) ((x).s123456789ABCDEF0)
156 #define ROT16_16(x) ((x))
157 /** @} */ // end of group ROTs_n
158 
159 /** Circular-right-shift (rotate-right) the given vector by the given amount.
160  * @name ROTATE
161  *
162  * @param[in] x The vector to be shifted
163  * @param[in] s The size of the vector
164  * @param[in] n The amount to be shifted
165  *
166  * @return The shifted vector
167  * @{
168  */
169 #define ROTATE_STR(x, s, n) ROT##s##_##n(x)
170 #define ROTATE(x, s, n) ROTATE_STR(x, s, n)
171 /** @} */ // end of group ROTATE
172 
173 /** Creates a vector of size n filled with offset values corresponding to the location of each element.
174  * @name V_OFFSn
175  *
176  * @param[in] dt The data type of the output vector
177  *
178  * @return The vector filled with offset values
179  * @{
180  */
181 #define V_OFFS1(dt) (dt##1)(0)
182 #define V_OFFS2(dt) (dt##2)(0, 1)
183 #define V_OFFS3(dt) (dt##3)(0, 1, 2)
184 #define V_OFFS4(dt) (dt##4)(0, 1, 2, 3)
185 #define V_OFFS8(dt) (dt##8)(0, 1, 2, 3, 4, 5, 6, 7)
186 #define V_OFFS16(dt) (dt##16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
187 /** @} */ // end of group V_OFFSn
188 
189 /** Create a vector filled with offset values corresponding to the location of each element.
190  * @name VEC_OFFS
191  *
192  * @param[in] dt The data type of the output vector
193  * @param[in] s The size of the output vector
194  *
195  * @return The vector filled with offset values
196  * @{
197  */
198 #define VEC_OFFS_STR(dt, s) V_OFFS##s(dt)
199 #define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s)
200 /** @} */ // end of group VEC_OFFS
201 
202 #define VLOAD_STR(size) vload##size
203 #define VLOAD(size) VLOAD_STR(size)
204 
205 #define PIXEL_UNIT4 1
206 #define PIXEL_UNIT8 2
207 #define PIXEL_UNIT16 4
208 
209 /** Utility macro to convert a vector size in pixel unit.
210  *
211  * @name CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT
212  *
213  * @param[in] vec_size Vector size. Only 4,8 and 16 is supported
214  *
215  * @return The pixel unit (number of pixels)
216  * @{
217  */
218 #define CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR(vec_size) PIXEL_UNIT##vec_size
219 #define CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(vec_size) CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR(vec_size)
220 /** @} */ // end of group CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT
221 
222 #define read_image2d_floatx1(img, x_coord, y_coord) (float4)(read_imagef(img, (int2)(x_coord, y_coord)));
223 #define read_image2d_floatx2(img, x_coord, y_coord) (float8)(read_imagef(img, (int2)(x_coord, y_coord)), read_imagef(img, (int2)(x_coord + 1, y_coord)));
224 #define read_image2d_floatx4(img, x_coord, y_coord) (float16)(read_imagef(img, (int2)(x_coord, y_coord)), read_imagef(img, (int2)(x_coord + 1, y_coord)), read_imagef(img, (int2)(x_coord + 2, y_coord)), read_imagef(img, (int2)(x_coord + 3, y_coord)));
225 
226 #if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
227 #define read_image2d_halfx1(img, x_coord, y_coord) (half4)(read_imageh(img, (int2)(x_coord, y_coord)));
228 #define read_image2d_halfx2(img, x_coord, y_coord) (half8)(read_imageh(img, (int2)(x_coord, y_coord)), read_imageh(img, (int2)(x_coord + 1, y_coord)));
229 #define read_image2d_halfx4(img, x_coord, y_coord) (half16)(read_imageh(img, (int2)(x_coord, y_coord)), read_imageh(img, (int2)(x_coord + 1, y_coord)), read_imageh(img, (int2)(x_coord + 2, y_coord)), read_imageh(img, (int2)(x_coord + 3, y_coord)));
230 #endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
231 
232 /** Utility macro to read a 2D OpenCL image object.
233  *
234  * @note Coordinates are not normalized
235  *
236  * @param[in] data_type Data type
237  * @param[in] n0 Number of pixel to read. Only 1,2 and 4 is supported
238  * @param[in] img OpenCL image object
239  * @param[in] x_coord The x coordinate for the top-left pixel
240  * @param[in] y_coord The y coordinate for the top-left pixel
241  *
242  * @return Pixels from the 2D OpenCL image object
243  * @{
244  */
245 #define READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord) read_image2d_##data_type##x##n0(img, x_coord, y_coord)
246 #define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord) READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord)
247 
248 #define VSTORE_STR(size) vstore##size
249 #define VSTORE(size) VSTORE_STR(size)
250 
251 #define float1 float
252 #define half1 half
253 #define char1 char
254 #define uchar1 uchar
255 #define short1 short
256 #define ushort1 ushort
257 #define int1 int
258 #define uint1 uint
259 #define long1 long
260 #define ulong1 ulong
261 #define double1 double
262 
263 #define vload1(OFFSET, PTR) *(OFFSET + PTR)
264 #define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA
265 
266 /** Extended partial vstore that correctly handles scalar values as well.
267  * Store the **lower** 0 to (n-1)th elements of the given vector while minimising the amount of vstore ops
268  * @name VSTORE_PARTIAL
269  *
270  * @note With this macro, the passed data can be both a vector and a scalar
271  * @note @p store_size needs to be <= @p size
272  * eg 1: Valid
273  * VSTORE_PARTIAL(16, 15) ...;
274  * eg 2: Invalid
275  * VSTORE_PARTIAL(4, 7) ...;
276  *
277  * @param[in] size The width of @p DATA. Supported values: 1(scalar), 2, 3, 4, 8, 16
278  * @param[in] store_size The number of lower elements to store. Supported values: 1-16, but has to be <= @p size
279  * @{
280  */
281 #define VSTORE_PARTIAL_STR(size, store_size) vstore_partial_##size##_##store_size
282 #define VSTORE_PARTIAL(size, store_size) VSTORE_PARTIAL_STR(size, store_size)
283 
284 #define NO_STORE(data, offs, ptr) \
285  { \
286  }
287 
288 // Size == 1 (scalar)
289 #define vstore_partial_1_0 NO_STORE
290 #define vstore_partial_1_1 vstore1
291 #define vstore_partial_1_2 NO_STORE
292 #define vstore_partial_1_3 NO_STORE
293 #define vstore_partial_1_4 NO_STORE
294 #define vstore_partial_1_5 NO_STORE
295 #define vstore_partial_1_6 NO_STORE
296 #define vstore_partial_1_7 NO_STORE
297 #define vstore_partial_1_8 NO_STORE
298 #define vstore_partial_1_9 NO_STORE
299 #define vstore_partial_1_10 NO_STORE
300 #define vstore_partial_1_11 NO_STORE
301 #define vstore_partial_1_12 NO_STORE
302 #define vstore_partial_1_13 NO_STORE
303 #define vstore_partial_1_14 NO_STORE
304 #define vstore_partial_1_15 NO_STORE
305 #define vstore_partial_1_16 NO_STORE
306 // Size == 2
307 #define vstore_partial_2_0 NO_STORE
308 #define vstore_partial_2_1 vstore_partial_1
309 #define vstore_partial_2_2 vstore_partial_2
310 #define vstore_partial_2_3 NO_STORE
311 #define vstore_partial_2_4 NO_STORE
312 #define vstore_partial_2_5 NO_STORE
313 #define vstore_partial_2_6 NO_STORE
314 #define vstore_partial_2_7 NO_STORE
315 #define vstore_partial_2_8 NO_STORE
316 #define vstore_partial_2_9 NO_STORE
317 #define vstore_partial_2_10 NO_STORE
318 #define vstore_partial_2_11 NO_STORE
319 #define vstore_partial_2_12 NO_STORE
320 #define vstore_partial_2_13 NO_STORE
321 #define vstore_partial_2_14 NO_STORE
322 #define vstore_partial_2_15 NO_STORE
323 #define vstore_partial_2_16 NO_STORE
324 // Size == 3
325 #define vstore_partial_3_0 NO_STORE
326 #define vstore_partial_3_1 vstore_partial_1
327 #define vstore_partial_3_2 vstore_partial_2
328 #define vstore_partial_3_3 vstore_partial_3
329 #define vstore_partial_3_4 NO_STORE
330 #define vstore_partial_3_5 NO_STORE
331 #define vstore_partial_3_6 NO_STORE
332 #define vstore_partial_3_7 NO_STORE
333 #define vstore_partial_3_8 NO_STORE
334 #define vstore_partial_3_9 NO_STORE
335 #define vstore_partial_3_10 NO_STORE
336 #define vstore_partial_3_11 NO_STORE
337 #define vstore_partial_3_12 NO_STORE
338 #define vstore_partial_3_13 NO_STORE
339 #define vstore_partial_3_14 NO_STORE
340 #define vstore_partial_3_15 NO_STORE
341 #define vstore_partial_3_16 NO_STORE
342 // Size == 4
343 #define vstore_partial_4_0 NO_STORE
344 #define vstore_partial_4_1 vstore_partial_1
345 #define vstore_partial_4_2 vstore_partial_2
346 #define vstore_partial_4_3 vstore_partial_3
347 #define vstore_partial_4_4 vstore_partial_4
348 #define vstore_partial_4_5 NO_STORE
349 #define vstore_partial_4_6 NO_STORE
350 #define vstore_partial_4_7 NO_STORE
351 #define vstore_partial_4_8 NO_STORE
352 #define vstore_partial_4_9 NO_STORE
353 #define vstore_partial_4_10 NO_STORE
354 #define vstore_partial_4_11 NO_STORE
355 #define vstore_partial_4_12 NO_STORE
356 #define vstore_partial_4_13 NO_STORE
357 #define vstore_partial_4_14 NO_STORE
358 #define vstore_partial_4_15 NO_STORE
359 #define vstore_partial_4_16 NO_STORE
360 // Size == 8
361 #define vstore_partial_8_0 NO_STORE
362 #define vstore_partial_8_1 vstore_partial_1
363 #define vstore_partial_8_2 vstore_partial_2
364 #define vstore_partial_8_3 vstore_partial_3
365 #define vstore_partial_8_4 vstore_partial_4
366 #define vstore_partial_8_5 vstore_partial_5
367 #define vstore_partial_8_6 vstore_partial_6
368 #define vstore_partial_8_7 vstore_partial_7
369 #define vstore_partial_8_8 vstore_partial_8
370 #define vstore_partial_8_9 NO_STORE
371 #define vstore_partial_8_10 NO_STORE
372 #define vstore_partial_8_11 NO_STORE
373 #define vstore_partial_8_12 NO_STORE
374 #define vstore_partial_8_13 NO_STORE
375 #define vstore_partial_8_14 NO_STORE
376 #define vstore_partial_8_15 NO_STORE
377 #define vstore_partial_8_16 NO_STORE
378 // Size == 16
379 #define vstore_partial_16_0 NO_STORE
380 #define vstore_partial_16_1 vstore_partial_1
381 #define vstore_partial_16_2 vstore_partial_2
382 #define vstore_partial_16_3 vstore_partial_3
383 #define vstore_partial_16_4 vstore_partial_4
384 #define vstore_partial_16_5 vstore_partial_5
385 #define vstore_partial_16_6 vstore_partial_6
386 #define vstore_partial_16_7 vstore_partial_7
387 #define vstore_partial_16_8 vstore_partial_8
388 #define vstore_partial_16_9 vstore_partial_9
389 #define vstore_partial_16_10 vstore_partial_10
390 #define vstore_partial_16_11 vstore_partial_11
391 #define vstore_partial_16_12 vstore_partial_12
392 #define vstore_partial_16_13 vstore_partial_13
393 #define vstore_partial_16_14 vstore_partial_14
394 #define vstore_partial_16_15 vstore_partial_15
395 #define vstore_partial_16_16 vstore_partial_16
396 
397 /** Partial vstore. Store the **lower** 0 to (n-1)th elements of the given vector while minimising the amount of vstore ops
398  * @name vstore_partial_n
399  *
400  * @note @p DATA needs to be a vector not a scalar
401  * @note n needs to be <= the vector width of the input variable @p DATA
402  * eg 1: Valid
403  * vstore_partial_15(var:float16, 0, 0xabcd);
404  * eg 2: Invalid
405  * vstore_partial_7(var:float4, 0, 0xabcd);
406  *
407  * @note in cases n == 1, 2, 3, 4, 8, 16, no extra vstore is invoked, thus there's no performance penalty.
408  *
409  * @param[in] DATA The name of the variable
410  * @param[in] OFFSET Offset in n
411  * @param[in] PTR The base pointer
412  * @{
413  */
414 #define vstore_partial_1(DATA, OFFSET, PTR) \
415  vstore1(DATA.s0, OFFSET, PTR);
416 
417 #define vstore_partial_2(DATA, OFFSET, PTR) \
418  vstore2(DATA.s01, OFFSET, PTR);
419 
420 #define vstore_partial_3(DATA, OFFSET, PTR) \
421  vstore3(DATA.s012, OFFSET, PTR);
422 
423 #define vstore_partial_4(DATA, OFFSET, PTR) \
424  vstore4(DATA.s0123, OFFSET, PTR);
425 
426 #define vstore_partial_5(DATA, OFFSET, PTR) \
427  vstore_partial_4(DATA.s0123, OFFSET, PTR); \
428  vstore1(DATA.s4, OFFSET, PTR + 4);
429 
430 #define vstore_partial_6(DATA, OFFSET, PTR) \
431  vstore_partial_4(DATA.s0123, OFFSET, PTR); \
432  vstore_partial_2(DATA.s45, OFFSET, PTR + 4);
433 
434 #define vstore_partial_7(DATA, OFFSET, PTR) \
435  vstore_partial_4(DATA.s0123, OFFSET, PTR); \
436  vstore_partial_3(DATA.s456, OFFSET, PTR + 4);
437 
438 #define vstore_partial_8(DATA, OFFSET, PTR) \
439  vstore8(DATA.s01234567, OFFSET, PTR);
440 
441 #define vstore_partial_9(DATA, OFFSET, PTR) \
442  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
443  vstore1(DATA.s8, OFFSET, PTR + 8);
444 
445 #define vstore_partial_10(DATA, OFFSET, PTR) \
446  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
447  vstore_partial_2(DATA.s89, OFFSET, PTR + 8);
448 
449 #define vstore_partial_11(DATA, OFFSET, PTR) \
450  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
451  vstore_partial_3(DATA.s89a, OFFSET, PTR + 8);
452 
453 #define vstore_partial_12(DATA, OFFSET, PTR) \
454  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
455  vstore_partial_4(DATA.s89ab, OFFSET, PTR + 8);
456 
457 #define vstore_partial_13(DATA, OFFSET, PTR) \
458  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
459  vstore_partial_5(DATA.s89abcdef, OFFSET, PTR + 8);
460 
461 #define vstore_partial_14(DATA, OFFSET, PTR) \
462  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
463  vstore_partial_6(DATA.s89abcdef, OFFSET, PTR + 8);
464 
465 #define vstore_partial_15(DATA, OFFSET, PTR) \
466  vstore_partial_8(DATA.s01234567, OFFSET, PTR); \
467  vstore_partial_7(DATA.s89abcdef, OFFSET, PTR + 8);
468 
469 #define vstore_partial_16(DATA, OFFSET, PTR) \
470  vstore16(DATA, OFFSET, PTR);
471 /** @} */ // end of groupd vstore_partial_n
472 /** @} */ // end of groupd VSTORE_PARTIAL
473 
474 // Convert built-in functions with _sat modifier are not supported in floating point so we create defines
475 // without _sat to overcome this issue
476 #define convert_float_sat convert_float
477 #define convert_float1_sat convert_float
478 #define convert_float2_sat convert_float2
479 #define convert_float3_sat convert_float3
480 #define convert_float4_sat convert_float4
481 #define convert_float8_sat convert_float8
482 #define convert_float16_sat convert_float16
483 #define convert_half_sat convert_float
484 #define convert_half1_sat convert_half
485 #define convert_half2_sat convert_half2
486 #define convert_half3_sat convert_half3
487 #define convert_half4_sat convert_half4
488 #define convert_half8_sat convert_half8
489 #define convert_half16_sat convert_half16
490 
491 #define convert_float1 convert_float
492 #define convert_half1 convert_half
493 #define convert_char1 convert_char
494 #define convert_uchar1 convert_uchar
495 #define convert_short1 convert_short
496 #define convert_ushort1 convert_ushort
497 #define convert_int1 convert_int
498 #define convert_uint1 convert_uint
499 #define convert_long1 convert_long
500 #define convert_ulong1 convert_ulong
501 #define convert_double1 convert_double
502 
503 #define convert_char1_sat convert_char_sat
504 #define convert_uchar1_sat convert_uchar_sat
505 #define convert_uchar2_sat convert_uchar2_sat
506 #define convert_uchar3_sat convert_uchar3_sat
507 #define convert_uchar4_sat convert_uchar4_sat
508 #define convert_uchar8_sat convert_uchar8_sat
509 #define convert_uchar16_sat convert_uchar16_sat
510 #define convert_short1_sat convert_short_sat
511 #define convert_ushort1_sat convert_ushort_sat
512 #define convert_int1_sat convert_int_sat
513 #define convert_uint1_sat convert_uint_sat
514 #define convert_long1_sat convert_long_sat
515 #define convert_ulong1_sat convert_ulong_sat
516 #define convert_double1_sat convert_double_sat
517 
518 #define VEC_DATA_TYPE_STR(type, size) type##size
519 #define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
520 
521 #define CONVERT_STR(x, type) (convert_##type((x)))
522 #define CONVERT(x, type) CONVERT_STR(x, type)
523 
524 #define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
525 #define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
526 
527 #define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
528 #define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
529 
530 #define select_vec_dt_uchar(size) uchar##size
531 #define select_vec_dt_char(size) char##size
532 #define select_vec_dt_ushort(size) ushort##size
533 #define select_vec_dt_short(size) short##size
534 #define select_vec_dt_half(size) short##size
535 #define select_vec_dt_uint(size) uint##size
536 #define select_vec_dt_int(size) int##size
537 #define select_vec_dt_float(size) int##size
538 #define select_vec_dt_ulong(size) ulong##size
539 #define select_vec_dt_long(size) long##size
540 
541 #define SELECT_VEC_DATA_TYPE_STR(type, size) select_vec_dt_##type(size)
542 #define SELECT_VEC_DATA_TYPE(type, size) SELECT_VEC_DATA_TYPE_STR(type, size)
543 #define SELECT_DATA_TYPE(type) SELECT_VEC_DATA_TYPE_STR(type, 1)
544 
545 #define signed_int_vec_dt_uchar(size) char##size
546 #define signed_int_vec_dt_char(size) char##size
547 #define signed_int_vec_dt_ushort(size) short##size
548 #define signed_int_vec_dt_short(size) short##size
549 #define signed_int_vec_dt_half(size) short##size
550 #define signed_int_vec_dt_uint(size) int##size
551 #define signed_int_vec_dt_int(size) int##size
552 #define signed_int_vec_dt_float(size) int##size
553 #define signed_int_vec_dt_ulong(size) long##size
554 #define signed_int_vec_dt_long(size) long##size
555 
556 #define SIGNED_INT_VEC_DATA_TYPE_STR(type, size) signed_int_vec_dt_##type(size)
557 #define SIGNED_INT_VEC_DATA_TYPE(type, size) SIGNED_INT_VEC_DATA_TYPE_STR(type, size)
558 #define SIGNED_INT_DATA_TYPE(type) SIGNED_INT_VEC_DATA_TYPE_STR(type, 1)
559 
560 #define sum_reduce_1(x) (x)
561 #define sum_reduce_2(x) ((x).s0) + ((x).s1)
562 #define sum_reduce_3(x) sum_reduce_2((x).s01) + ((x).s2)
563 #define sum_reduce_4(x) sum_reduce_2((x).s01) + sum_reduce_2((x).s23)
564 #define sum_reduce_8(x) sum_reduce_4((x).s0123) + sum_reduce_4((x).s4567)
565 #define sum_reduce_16(x) sum_reduce_8((x).s01234567) + sum_reduce_8((x).s89ABCDEF)
566 
567 #define SUM_REDUCE_STR(x, size) sum_reduce_##size(x)
568 #define SUM_REDUCE(x, size) SUM_REDUCE_STR(x, size)
569 
570 #define prod_reduce_1(x) (x)
571 #define prod_reduce_2(x) ((x).s0) * ((x).s1)
572 #define prod_reduce_3(x) prod_reduce_2((x).s01) * ((x).s2)
573 #define prod_reduce_4(x) prod_reduce_2((x).s01) * prod_reduce_2((x).s23)
574 #define prod_reduce_8(x) prod_reduce_4((x).s0123) * prod_reduce_4((x).s4567)
575 #define prod_reduce_16(x) prod_reduce_8((x).s01234567) * prod_reduce_8((x).s89ABCDEF)
576 
577 #define PROD_REDUCE_STR(x, size) prod_reduce_##size(x)
578 #define PROD_REDUCE(x, size) PROD_REDUCE_STR(x, size)
579 
580 #define max_reduce_1(x) (x)
581 #define max_reduce_2(x) max(((x).s0), ((x).s1))
582 #define max_reduce_3(x) max(max_reduce_2((x).s01), ((x).s2))
583 #define max_reduce_4(x) max(max_reduce_2((x).s01), max_reduce_2((x).s23))
584 #define max_reduce_8(x) max(max_reduce_4((x).s0123), max_reduce_4((x).s4567))
585 #define max_reduce_16(x) max(max_reduce_8((x).s01234567), max_reduce_8((x).s89ABCDEF))
586 
587 #define MAX_REDUCE_STR(x, size) max_reduce_##size(x)
588 #define MAX_REDUCE(x, size) MAX_REDUCE_STR(x, size)
589 
590 #define VECTOR_DECLARATION(name) \
591  __global uchar *name##_ptr, \
592  uint name##_stride_x, \
593  uint name##_step_x, \
594  uint name##_offset_first_element_in_bytes
595 
596 #define IMAGE_DECLARATION(name) \
597  __global uchar *name##_ptr, \
598  uint name##_stride_x, \
599  uint name##_step_x, \
600  uint name##_stride_y, \
601  uint name##_step_y, \
602  uint name##_offset_first_element_in_bytes
603 
604 #define TENSOR3D_DECLARATION(name) \
605  __global uchar *name##_ptr, \
606  uint name##_stride_x, \
607  uint name##_step_x, \
608  uint name##_stride_y, \
609  uint name##_step_y, \
610  uint name##_stride_z, \
611  uint name##_step_z, \
612  uint name##_offset_first_element_in_bytes
613 
614 #define TENSOR4D_DECLARATION(name) \
615  __global uchar *name##_ptr, \
616  uint name##_stride_x, \
617  uint name##_step_x, \
618  uint name##_stride_y, \
619  uint name##_step_y, \
620  uint name##_stride_z, \
621  uint name##_step_z, \
622  uint name##_stride_w, \
623  uint name##_step_w, \
624  uint name##_offset_first_element_in_bytes
625 
626 #define CONVERT_TO_VECTOR_STRUCT(name) \
627  update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
628 
629 #define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
630  update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
631 
632 #define CONVERT_TO_IMAGE_STRUCT(name) \
633  update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
634 
635 #define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
636  update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)
637 
638 #define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
639  update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
640 
641 #define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
642  update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)
643 
644 #define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
645  update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
646 
647 #define CONVERT_TO_TENSOR3D_STRUCT(name) \
648  update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
649  name##_stride_z, name##_step_z)
650 
651 #define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
652  update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0)
653 
654 #define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
655  update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
656  name##_stride_z, name##_step_z, name##_stride_w, name##_step_w, mod_size)
657 
658 #define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
659  update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)
660 
661 #define CONVERT_TO_TENSOR3D_STRUCT_NO_UPDATE_PTR(name) \
662  tensor3D_ptr_no_update(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
663  name##_stride_z, name##_step_z)
664 
665 /** Structure to hold Vector information */
666 typedef struct Vector
667 {
668  __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
669  int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
670  int stride_x; /**< Stride of the image in X dimension (in bytes) */
671 } Vector;
672 
673 /** Structure to hold Image information */
674 typedef struct Image
675 {
676  __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
677  int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
678  int stride_x; /**< Stride of the image in X dimension (in bytes) */
679  int stride_y; /**< Stride of the image in Y dimension (in bytes) */
680 } Image;
681 
682 /** Structure to hold 3D tensor information */
683 typedef struct Tensor3D
684 {
685  __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
686  int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
687  int stride_x; /**< Stride of the image in X dimension (in bytes) */
688  int stride_y; /**< Stride of the image in Y dimension (in bytes) */
689  int stride_z; /**< Stride of the image in Z dimension (in bytes) */
690 } Tensor3D;
691 
692 /** Structure to hold 4D tensor information */
693 typedef struct Tensor4D
694 {
695  __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
696  int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
697  int stride_x; /**< Stride of the image in X dimension (in bytes) */
698  int stride_y; /**< Stride of the image in Y dimension (in bytes) */
699  int stride_z; /**< Stride of the image in Z dimension (in bytes) */
700  int stride_w; /**< Stride of the image in W dimension (in bytes) */
701 } Tensor4D;
702 
703 /** Wrap vector information into an Vector structure, and make the pointer point at this workitem's data.
704  *
705  * @param[in] ptr Pointer to the starting postion of the buffer
706  * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
707  * @param[in] stride_x Stride of the vector in X dimension (in bytes)
708  * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
709  *
710  * @return An image object
711  */
712 inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
713 {
714  Vector vector =
715  {
716  .ptr = ptr,
717  .offset_first_element_in_bytes = offset_first_element_in_bytes,
718  .stride_x = stride_x,
719  };
720  vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
721  return vector;
722 }
723 
724 /** Wrap image information into an Image structure, and make the pointer point at this workitem's data.
725  *
726  * @param[in] ptr Pointer to the starting postion of the buffer
727  * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
728  * @param[in] stride_x Stride of the image in X dimension (in bytes)
729  * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
730  * @param[in] stride_y Stride of the image in Y dimension (in bytes)
731  * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
732  *
733  * @return An image object
734  */
735 inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
736 {
737  Image img =
738  {
739  .ptr = ptr,
740  .offset_first_element_in_bytes = offset_first_element_in_bytes,
741  .stride_x = stride_x,
742  .stride_y = stride_y
743  };
744  img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
745  return img;
746 }
747 
748 /** Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data.
749  *
750  * @param[in] ptr Pointer to the starting postion of the buffer
751  * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
752  * @param[in] stride_x Stride of the image in X dimension (in bytes)
753  * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
754  * @param[in] stride_y Stride of the image in Y dimension (in bytes)
755  * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
756  * @param[in] stride_z Stride of the image in Z dimension (in bytes)
757  * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
758  *
759  * @return A 3D tensor object
760  */
761 inline Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
762 {
763  Image img =
764  {
765  .ptr = ptr,
766  .offset_first_element_in_bytes = offset_first_element_in_bytes,
767  .stride_x = stride_x,
768  .stride_y = stride_y
769  };
770  img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
771  return img;
772 }
773 
774 /** Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data.
775  *
776  * @param[in] ptr Pointer to the starting postion of the buffer
777  * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
778  * @param[in] stride_x Stride of the image in X dimension (in bytes)
779  * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
780  * @param[in] stride_y Stride of the image in Y dimension (in bytes)
781  * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
782  * @param[in] stride_z Stride of the image in Z dimension (in bytes)
783  * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
784  *
785  * @return A 3D tensor object
786  */
787 inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
788 {
789  Tensor3D tensor =
790  {
791  .ptr = ptr,
792  .offset_first_element_in_bytes = offset_first_element_in_bytes,
793  .stride_x = stride_x,
794  .stride_y = stride_y,
795  .stride_z = stride_z
796  };
797  tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
798  return tensor;
799 }
800 
801 /** Wrap 3D tensor information into an tensor structure.
802  *
803  * @param[in] ptr Pointer to the starting postion of the buffer
804  * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
805  * @param[in] stride_x Stride of the image in X dimension (in bytes)
806  * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
807  * @param[in] stride_y Stride of the image in Y dimension (in bytes)
808  * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
809  * @param[in] stride_z Stride of the image in Z dimension (in bytes)
810  * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
811  *
812  * @return A 3D tensor object
813  */
814 inline Tensor3D tensor3D_ptr_no_update(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
815 {
816  Tensor3D tensor =
817  {
818  .ptr = ptr,
819  .offset_first_element_in_bytes = offset_first_element_in_bytes,
820  .stride_x = stride_x,
821  .stride_y = stride_y,
822  .stride_z = stride_z
823  };
824  return tensor;
825 }
826 
827 inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w,
828  uint step_w,
829  uint mod_size)
830 {
831  Tensor4D tensor =
832  {
833  .ptr = ptr,
834  .offset_first_element_in_bytes = offset_first_element_in_bytes,
835  .stride_x = stride_x,
836  .stride_y = stride_y,
837  .stride_z = stride_z,
838  .stride_w = stride_w
839  };
840 
841  tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + (get_global_id(2) / mod_size) * step_w;
842  return tensor;
843 }
844 
845 /** Get the pointer position of a Vector
846  *
847  * @param[in] vec Pointer to the starting position of the buffer
848  * @param[in] x Relative X position
849  */
850 inline __global const uchar *vector_offset(const Vector *vec, int x)
851 {
852  return vec->ptr + x * vec->stride_x;
853 }
854 
855 /** Get the pointer position of a Image
856  *
857  * @param[in] img Pointer to the starting position of the buffer
858  * @param[in] x Relative X position
859  * @param[in] y Relative Y position
860  */
861 inline __global uchar *offset(const Image *img, int x, int y)
862 {
863  return img->ptr + x * img->stride_x + y * img->stride_y;
864 }
865 
866 /** Get the pointer position of a Tensor3D
867  *
868  * @param[in] tensor Pointer to the starting position of the buffer
869  * @param[in] x Relative X position
870  * @param[in] y Relative Y position
871  * @param[in] z Relative Z position
872  */
873 inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
874 {
875  return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
876 }
877 
878 /** Get the pointer position of a Tensor4D
879  *
880  * @param[in] tensor Pointer to the starting position of the buffer
881  * @param[in] x Relative X position
882  * @param[in] y Relative Y position
883  * @param[in] z Relative Z position
884  * @param[in] w Relative W position
885  */
886 inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
887 {
888  return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + w * tensor->stride_w;
889 }
890 
891 /** Get the offset for a given linear index of a Tensor3D
892  *
893  * @param[in] tensor Pointer to the starting position of the buffer
894  * @param[in] width Width of the input tensor
895  * @param[in] height Height of the input tensor
896  * @param[in] depth Depth of the input tensor
897  * @param[in] index Linear index
898  */
899 inline __global const uchar *tensor3D_index2ptr(const Tensor3D *tensor, uint width, uint height, uint depth, uint index)
900 {
901  uint num_elements = width * height;
902 
903  const uint z = index / num_elements;
904 
905  index %= num_elements;
906 
907  const uint y = index / width;
908 
909  index %= width;
910 
911  const uint x = index;
912 
913  return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + tensor->offset_first_element_in_bytes;
914 }
915 
916 #endif // _HELPER_H
Structure to hold Vector information.
Definition: helpers.h:666
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
Definition: helpers.h:861
SimpleTensor< float > w
Definition: DFT.cpp:156
Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
Wrap image information into an Image structure, and make the pointer point at this workitem&#39;s data...
Definition: helpers.h:735
Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem&#39;s da...
Definition: helpers.h:787
__global uchar * ptr
Pointer to the starting postion of the buffer.
Definition: helpers.h:695
int stride_z
Stride of the image in Z dimension (in bytes)
Definition: helpers.h:689
struct Image Image
Structure to hold Image information.
struct Tensor3D Tensor3D
Structure to hold 3D tensor information.
__global const uchar * tensor3D_index2ptr(const Tensor3D *tensor, uint width, uint height, uint depth, uint index)
Get the offset for a given linear index of a Tensor3D.
Definition: helpers.h:899
int stride_x
Stride of the image in X dimension (in bytes)
Definition: helpers.h:687
struct Tensor4D Tensor4D
Structure to hold 4D tensor information.
int offset_first_element_in_bytes
The offset of the first element in the source image.
Definition: helpers.h:677
Structure to hold 3D tensor information.
Definition: helpers.h:683
Structure to hold 4D tensor information.
Definition: helpers.h:693
int stride_w
Stride of the image in W dimension (in bytes)
Definition: helpers.h:700
__global const uchar * tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
Get the pointer position of a Tensor4D.
Definition: helpers.h:886
int stride_x
Stride of the image in X dimension (in bytes)
Definition: helpers.h:670
__global uchar * ptr
Pointer to the starting postion of the buffer.
Definition: helpers.h:668
Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Wrap 3D tensor information into an image structure, and make the pointer point at this workitem&#39;s dat...
Definition: helpers.h:761
int stride_y
Stride of the image in Y dimension (in bytes)
Definition: helpers.h:698
Structure to hold Image information.
Definition: helpers.h:674
int offset_first_element_in_bytes
The offset of the first element in the source image.
Definition: helpers.h:669
int offset_first_element_in_bytes
The offset of the first element in the source image.
Definition: helpers.h:696
__global uchar * ptr
Pointer to the starting postion of the buffer.
Definition: helpers.h:676
__global const uchar * vector_offset(const Vector *vec, int x)
Get the pointer position of a Vector.
Definition: helpers.h:850
struct Vector Vector
Structure to hold Vector information.
Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
Wrap vector information into an Vector structure, and make the pointer point at this workitem&#39;s data...
Definition: helpers.h:712
__global uchar * ptr
Pointer to the starting postion of the buffer.
Definition: helpers.h:685
int stride_x
Stride of the image in X dimension (in bytes)
Definition: helpers.h:697
int stride_y
Stride of the image in Y dimension (in bytes)
Definition: helpers.h:688
int stride_z
Stride of the image in Z dimension (in bytes)
Definition: helpers.h:699
int offset_first_element_in_bytes
The offset of the first element in the source image.
Definition: helpers.h:686
Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w, uint step_w, uint mod_size)
Definition: helpers.h:827
int stride_y
Stride of the image in Y dimension (in bytes)
Definition: helpers.h:679
Tensor3D tensor3D_ptr_no_update(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Wrap 3D tensor information into an tensor structure.
Definition: helpers.h:814
__global const uchar * tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Get the pointer position of a Tensor3D.
Definition: helpers.h:873
int stride_x
Stride of the image in X dimension (in bytes)
Definition: helpers.h:678