#include "load_store_utility.h"

Data Structures
struct	Vector
	Structure to hold Vector information. More...

struct	Image
	Structure to hold Image information. More...

struct	Tensor3D
	Structure to hold 3D tensor information. More...

struct	Tensor4D
	Structure to hold 4D tensor information. More...

Macros

#define GPU_ARCH_MIDGARD 0x100

#define GPU_ARCH_BIFROST 0x200

#define CONCAT(a, b) a##b

Concatenate two inputs. More...

#define EXPAND(x) x

Expand the given vector. More...

#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)

Clamp the given value between an upper and lower bound. More...

#define VLOAD_STR(size) vload##size

#define VLOAD(size) VLOAD_STR(size)

#define PIXEL_UNIT4 1

#define PIXEL_UNIT8 2

#define PIXEL_UNIT16 4

#define read_image2d_floatx1(img, x_coord, y_coord) (float4)(read_imagef(img, (int2)(x_coord, y_coord)));

#define read_image2d_floatx2(img, x_coord, y_coord) (float8)(read_imagef(img, (int2)(x_coord, y_coord)), read_imagef(img, (int2)(x_coord + 1, y_coord)));

#define read_image2d_floatx4(img, x_coord, y_coord) (float16)(read_imagef(img, (int2)(x_coord, y_coord)), read_imagef(img, (int2)(x_coord + 1, y_coord)), read_imagef(img, (int2)(x_coord + 2, y_coord)), read_imagef(img, (int2)(x_coord + 3, y_coord)));

#define convert_float_sat convert_float

#define convert_float1_sat convert_float

#define convert_float2_sat convert_float2

#define convert_float3_sat convert_float3

#define convert_float4_sat convert_float4

#define convert_float8_sat convert_float8

#define convert_float16_sat convert_float16

#define convert_half_sat convert_float

#define convert_half1_sat convert_half

#define convert_half2_sat convert_half2

#define convert_half3_sat convert_half3

#define convert_half4_sat convert_half4

#define convert_half8_sat convert_half8

#define convert_half16_sat convert_half16

#define convert_float1 convert_float

#define convert_half1 convert_half

#define convert_char1 convert_char

#define convert_uchar1 convert_uchar

#define convert_short1 convert_short

#define convert_ushort1 convert_ushort

#define convert_int1 convert_int

#define convert_uint1 convert_uint

#define convert_long1 convert_long

#define convert_ulong1 convert_ulong

#define convert_double1 convert_double

#define convert_char1_sat convert_char_sat

#define convert_uchar1_sat convert_uchar_sat

#define convert_uchar2_sat convert_uchar2_sat

#define convert_uchar3_sat convert_uchar3_sat

#define convert_uchar4_sat convert_uchar4_sat

#define convert_uchar8_sat convert_uchar8_sat

#define convert_uchar16_sat convert_uchar16_sat

#define convert_short1_sat convert_short_sat

#define convert_ushort1_sat convert_ushort_sat

#define convert_int1_sat convert_int_sat

#define convert_uint1_sat convert_uint_sat

#define convert_long1_sat convert_long_sat

#define convert_ulong1_sat convert_ulong_sat

#define convert_double1_sat convert_double_sat

#define VEC_DATA_TYPE_STR(type, size) type##size

#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)

#define CONVERT_STR(x, type) (convert_##type((x)))

#define CONVERT(x, type) CONVERT_STR(x, type)

#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))

#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)

#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))

#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)

#define select_vec_dt_uchar(size) uchar##size

#define select_vec_dt_char(size) char##size

#define select_vec_dt_ushort(size) ushort##size

#define select_vec_dt_short(size) short##size

#define select_vec_dt_half(size) short##size

#define select_vec_dt_uint(size) uint##size

#define select_vec_dt_int(size) int##size

#define select_vec_dt_float(size) int##size

#define select_vec_dt_ulong(size) ulong##size

#define select_vec_dt_long(size) long##size

#define SELECT_VEC_DATA_TYPE_STR(type, size) select_vec_dt_##type(size)

#define SELECT_VEC_DATA_TYPE(type, size) SELECT_VEC_DATA_TYPE_STR(type, size)

#define SELECT_DATA_TYPE(type) SELECT_VEC_DATA_TYPE_STR(type, 1)

#define signed_int_vec_dt_uchar(size) char##size

#define signed_int_vec_dt_char(size) char##size

#define signed_int_vec_dt_ushort(size) short##size

#define signed_int_vec_dt_short(size) short##size

#define signed_int_vec_dt_half(size) short##size

#define signed_int_vec_dt_uint(size) int##size

#define signed_int_vec_dt_int(size) int##size

#define signed_int_vec_dt_float(size) int##size

#define signed_int_vec_dt_ulong(size) long##size

#define signed_int_vec_dt_long(size) long##size

#define SIGNED_INT_VEC_DATA_TYPE_STR(type, size) signed_int_vec_dt_##type(size)

#define SIGNED_INT_VEC_DATA_TYPE(type, size) SIGNED_INT_VEC_DATA_TYPE_STR(type, size)

#define SIGNED_INT_DATA_TYPE(type) SIGNED_INT_VEC_DATA_TYPE_STR(type, 1)

#define sum_reduce_1(x) (x)

#define sum_reduce_2(x) ((x).s0) + ((x).s1)

#define sum_reduce_3(x) sum_reduce_2((x).s01) + ((x).s2)

#define sum_reduce_4(x) sum_reduce_2((x).s01) + sum_reduce_2((x).s23)

#define sum_reduce_8(x) sum_reduce_4((x).s0123) + sum_reduce_4((x).s4567)

#define sum_reduce_16(x) sum_reduce_8((x).s01234567) + sum_reduce_8((x).s89ABCDEF)

#define SUM_REDUCE_STR(x, size) sum_reduce_##size(x)

#define SUM_REDUCE(x, size) SUM_REDUCE_STR(x, size)

#define prod_reduce_1(x) (x)

#define prod_reduce_2(x) ((x).s0) * ((x).s1)

#define prod_reduce_3(x) prod_reduce_2((x).s01) * ((x).s2)

#define prod_reduce_4(x) prod_reduce_2((x).s01) * prod_reduce_2((x).s23)

#define prod_reduce_8(x) prod_reduce_4((x).s0123) * prod_reduce_4((x).s4567)

#define prod_reduce_16(x) prod_reduce_8((x).s01234567) * prod_reduce_8((x).s89ABCDEF)

#define PROD_REDUCE_STR(x, size) prod_reduce_##size(x)

#define PROD_REDUCE(x, size) PROD_REDUCE_STR(x, size)

#define max_reduce_1(x) (x)

#define max_reduce_2(x) max(((x).s0), ((x).s1))

#define max_reduce_3(x) max(max_reduce_2((x).s01), ((x).s2))

#define max_reduce_4(x) max(max_reduce_2((x).s01), max_reduce_2((x).s23))

#define max_reduce_8(x) max(max_reduce_4((x).s0123), max_reduce_4((x).s4567))

#define max_reduce_16(x) max(max_reduce_8((x).s01234567), max_reduce_8((x).s89ABCDEF))

#define MAX_REDUCE_STR(x, size) max_reduce_##size(x)

#define MAX_REDUCE(x, size) MAX_REDUCE_STR(x, size)

#define VECTOR_DECLARATION(name)

#define IMAGE_DECLARATION(name)

#define TENSOR3D_DECLARATION(name)

#define TENSOR4D_DECLARATION(name)

#define CONVERT_TO_VECTOR_STRUCT(name) update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)

#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)

#define CONVERT_TO_IMAGE_STRUCT(name) update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)

#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)

#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)

#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)

#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)

#define CONVERT_TO_TENSOR3D_STRUCT(name)

#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0)

#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size)

#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)

#define CONVERT_TO_TENSOR3D_STRUCT_NO_UPDATE_PTR(name)

REVn

REVn reverses the given vector whose size is n.

Parameters

[in] x The vector to be reversed

Returns: The reversed vector

#define REV1(x) ((x))

#define REV2(x) ((x).s10)

#define REV3(x) ((x).s210)

#define REV4(x) ((x).s3210)

#define REV8(x) ((x).s76543210)

#define REV16(x) ((x).sFEDCBA9876543210)

REVERSE

Reverse the given vector.

Parameters

[in]	x	The vector to be reversed
[in]	s	The size of the vector

Returns: The reversed vector

#define REVERSE_STR(x, s) REV##s((x))

#define REVERSE(x, s) REVERSE_STR(x, s)

ROTs_n

Circular-right-shift (rotate-right) the vector of size s by the amount of n.

Parameters

[in] x The vector to be shifted

Returns: The shifted vector

#define ROT1_0(x) ((x))

#define ROT1_1(x) ((x))

#define ROT2_0(x) ((x))

#define ROT2_1(x) ((x).s10)

#define ROT2_2(x) ((x))

#define ROT3_0(x) ((x))

#define ROT3_1(x) ((x).s201)

#define ROT3_2(x) ((x).s120)

#define ROT3_3(x) ((x))

#define ROT4_0(x) ((x))

#define ROT4_1(x) ((x).s3012)

#define ROT4_2(x) ((x).s2301)

#define ROT4_3(x) ((x).s1230)

#define ROT4_4(x) ((x))

#define ROT8_0(x) ((x))

#define ROT8_1(x) ((x).s70123456)

#define ROT8_2(x) ((x).s67012345)

#define ROT8_3(x) ((x).s56701234)

#define ROT8_4(x) ((x).s45670123)

#define ROT8_5(x) ((x).s34567012)

#define ROT8_6(x) ((x).s23456701)

#define ROT8_7(x) ((x).s12345670)

#define ROT8_8(x) ((x))

#define ROT16_0(x) ((x))

#define ROT16_1(x) ((x).sF0123456789ABCDE)

#define ROT16_2(x) ((x).sEF0123456789ABCD)

#define ROT16_3(x) ((x).sDEF0123456789ABC)

#define ROT16_4(x) ((x).sCDEF0123456789AB)

#define ROT16_5(x) ((x).sBCDEF0123456789A)

#define ROT16_6(x) ((x).sABCDEF0123456789)

#define ROT16_7(x) ((x).s9ABCDEF012345678)

#define ROT16_8(x) ((x).s89ABCDEF01234567)

#define ROT16_9(x) ((x).s789ABCDEF0123456)

#define ROT16_10(x) ((x).s6789ABCDEF012345)

#define ROT16_11(x) ((x).s56789ABCDEF01234)

#define ROT16_12(x) ((x).s456789ABCDEF0123)

#define ROT16_13(x) ((x).s3456789ABCDEF012)

#define ROT16_14(x) ((x).s23456789ABCDEF01)

#define ROT16_15(x) ((x).s123456789ABCDEF0)

#define ROT16_16(x) ((x))

ROTATE

Circular-right-shift (rotate-right) the given vector by the given amount.

Parameters

[in]	x	The vector to be shifted
[in]	s	The size of the vector
[in]	n	The amount to be shifted

Returns: The shifted vector

#define ROTATE_STR(x, s, n) ROT##s##_##n(x)

#define ROTATE(x, s, n) ROTATE_STR(x, s, n)

V_OFFSn

Creates a vector of size n filled with offset values corresponding to the location of each element.

Parameters

[in] dt The data type of the output vector

Returns: The vector filled with offset values

#define V_OFFS1(dt) (dt##1)(0)

#define V_OFFS2(dt) (dt##2)(0, 1)

#define V_OFFS3(dt) (dt##3)(0, 1, 2)

#define V_OFFS4(dt) (dt##4)(0, 1, 2, 3)

#define V_OFFS8(dt) (dt##8)(0, 1, 2, 3, 4, 5, 6, 7)

#define V_OFFS16(dt) (dt##16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)

VEC_OFFS

Create a vector filled with offset values corresponding to the location of each element.

Parameters

[in]	dt	The data type of the output vector
[in]	s	The size of the output vector

Returns: The vector filled with offset values

#define VEC_OFFS_STR(dt, s) V_OFFS##s(dt)

#define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s)

CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT

Utility macro to convert a vector size in pixel unit.

Parameters

[in] vec_size Vector size. Only 4,8 and 16 is supported

Returns: The pixel unit (number of pixels)

#define CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR(vec_size) PIXEL_UNIT##vec_size

#define CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(vec_size) CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR(vec_size)

#define READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord) read_image2d_##data_type##x##n0(img, x_coord, y_coord)

Utility macro to read a 2D OpenCL image object. More...

#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord) READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord)

#define VSTORE_STR(size) vstore##size

#define VSTORE(size) VSTORE_STR(size)

#define float1 float

#define half1 half

#define char1 char

#define uchar1 uchar

#define short1 short

#define ushort1 ushort

#define int1 int

#define uint1 uint

#define long1 long

#define ulong1 ulong

#define double1 double

#define vload1(OFFSET, PTR) *(OFFSET + PTR)

#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA

VSTORE_PARTIAL

Extended partial vstore that correctly handles scalar values as well.

Store the lower 0 to (n-1)th elements of the given vector while minimising the amount of vstore ops

Note: With this macro, the passed data can be both a vector and a scalar; store_size needs to be <= size eg 1: Valid VSTORE_PARTIAL(16, 15) ...; eg 2: Invalid VSTORE_PARTIAL(4, 7) ...;

Parameters

[in]	size	The width of `DATA`. Supported values: 1(scalar), 2, 3, 4, 8, 16
[in]	store_size	The number of lower elements to store. Supported values: 1-16, but has to be <= `size`

#define VSTORE_PARTIAL_STR(size, store_size) vstore_partial_##size##_##store_size

#define VSTORE_PARTIAL(size, store_size) VSTORE_PARTIAL_STR(size, store_size)

#define NO_STORE(data, offs, ptr)

#define vstore_partial_1_0 NO_STORE

#define vstore_partial_1_1 vstore1

#define vstore_partial_1_2 NO_STORE

#define vstore_partial_1_3 NO_STORE

#define vstore_partial_1_4 NO_STORE

#define vstore_partial_1_5 NO_STORE

#define vstore_partial_1_6 NO_STORE

#define vstore_partial_1_7 NO_STORE

#define vstore_partial_1_8 NO_STORE

#define vstore_partial_1_9 NO_STORE

#define vstore_partial_1_10 NO_STORE

#define vstore_partial_1_11 NO_STORE

#define vstore_partial_1_12 NO_STORE

#define vstore_partial_1_13 NO_STORE

#define vstore_partial_1_14 NO_STORE

#define vstore_partial_1_15 NO_STORE

#define vstore_partial_1_16 NO_STORE

#define vstore_partial_2_0 NO_STORE

#define vstore_partial_2_1 vstore_partial_1

#define vstore_partial_2_2 vstore_partial_2

#define vstore_partial_2_3 NO_STORE

#define vstore_partial_2_4 NO_STORE

#define vstore_partial_2_5 NO_STORE

#define vstore_partial_2_6 NO_STORE

#define vstore_partial_2_7 NO_STORE

#define vstore_partial_2_8 NO_STORE

#define vstore_partial_2_9 NO_STORE

#define vstore_partial_2_10 NO_STORE

#define vstore_partial_2_11 NO_STORE

#define vstore_partial_2_12 NO_STORE

#define vstore_partial_2_13 NO_STORE

#define vstore_partial_2_14 NO_STORE

#define vstore_partial_2_15 NO_STORE

#define vstore_partial_2_16 NO_STORE

#define vstore_partial_3_0 NO_STORE

#define vstore_partial_3_1 vstore_partial_1

#define vstore_partial_3_2 vstore_partial_2

#define vstore_partial_3_3 vstore_partial_3

#define vstore_partial_3_4 NO_STORE

#define vstore_partial_3_5 NO_STORE

#define vstore_partial_3_6 NO_STORE

#define vstore_partial_3_7 NO_STORE

#define vstore_partial_3_8 NO_STORE

#define vstore_partial_3_9 NO_STORE

#define vstore_partial_3_10 NO_STORE

#define vstore_partial_3_11 NO_STORE

#define vstore_partial_3_12 NO_STORE

#define vstore_partial_3_13 NO_STORE

#define vstore_partial_3_14 NO_STORE

#define vstore_partial_3_15 NO_STORE

#define vstore_partial_3_16 NO_STORE

#define vstore_partial_4_0 NO_STORE

#define vstore_partial_4_1 vstore_partial_1

#define vstore_partial_4_2 vstore_partial_2

#define vstore_partial_4_3 vstore_partial_3

#define vstore_partial_4_4 vstore_partial_4

#define vstore_partial_4_5 NO_STORE

#define vstore_partial_4_6 NO_STORE

#define vstore_partial_4_7 NO_STORE

#define vstore_partial_4_8 NO_STORE

#define vstore_partial_4_9 NO_STORE

#define vstore_partial_4_10 NO_STORE

#define vstore_partial_4_11 NO_STORE

#define vstore_partial_4_12 NO_STORE

#define vstore_partial_4_13 NO_STORE

#define vstore_partial_4_14 NO_STORE

#define vstore_partial_4_15 NO_STORE

#define vstore_partial_4_16 NO_STORE

#define vstore_partial_8_0 NO_STORE

#define vstore_partial_8_1 vstore_partial_1

#define vstore_partial_8_2 vstore_partial_2

#define vstore_partial_8_3 vstore_partial_3

#define vstore_partial_8_4 vstore_partial_4

#define vstore_partial_8_5 vstore_partial_5

#define vstore_partial_8_6 vstore_partial_6

#define vstore_partial_8_7 vstore_partial_7

#define vstore_partial_8_8 vstore_partial_8

#define vstore_partial_8_9 NO_STORE

#define vstore_partial_8_10 NO_STORE

#define vstore_partial_8_11 NO_STORE

#define vstore_partial_8_12 NO_STORE

#define vstore_partial_8_13 NO_STORE

#define vstore_partial_8_14 NO_STORE

#define vstore_partial_8_15 NO_STORE

#define vstore_partial_8_16 NO_STORE

#define vstore_partial_16_0 NO_STORE

#define vstore_partial_16_1 vstore_partial_1

#define vstore_partial_16_2 vstore_partial_2

#define vstore_partial_16_3 vstore_partial_3

#define vstore_partial_16_4 vstore_partial_4

#define vstore_partial_16_5 vstore_partial_5

#define vstore_partial_16_6 vstore_partial_6

#define vstore_partial_16_7 vstore_partial_7

#define vstore_partial_16_8 vstore_partial_8

#define vstore_partial_16_9 vstore_partial_9

#define vstore_partial_16_10 vstore_partial_10

#define vstore_partial_16_11 vstore_partial_11

#define vstore_partial_16_12 vstore_partial_12

#define vstore_partial_16_13 vstore_partial_13

#define vstore_partial_16_14 vstore_partial_14

#define vstore_partial_16_15 vstore_partial_15

#define vstore_partial_16_16 vstore_partial_16

vstore_partial_n

Partial vstore.

Store the lower 0 to (n-1)th elements of the given vector while minimising the amount of vstore ops

Note: DATA needs to be a vector not a scalar; n needs to be <= the vector width of the input variable DATA eg 1: Valid vstore_partial_15(var:float16, 0, 0xabcd); eg 2: Invalid vstore_partial_7(var:float4, 0, 0xabcd);; in cases n == 1, 2, 3, 4, 8, 16, no extra vstore is invoked, thus there's no performance penalty.

Parameters

[in]	DATA	The name of the variable
[in]	OFFSET	Offset in n
[in]	PTR	The base pointer

#define vstore_partial_1(DATA, OFFSET, PTR) vstore1(DATA.s0, OFFSET, PTR);

#define vstore_partial_2(DATA, OFFSET, PTR) vstore2(DATA.s01, OFFSET, PTR);

#define vstore_partial_3(DATA, OFFSET, PTR) vstore3(DATA.s012, OFFSET, PTR);

#define vstore_partial_4(DATA, OFFSET, PTR) vstore4(DATA.s0123, OFFSET, PTR);

#define vstore_partial_5(DATA, OFFSET, PTR)

#define vstore_partial_6(DATA, OFFSET, PTR)

#define vstore_partial_7(DATA, OFFSET, PTR)

#define vstore_partial_8(DATA, OFFSET, PTR) vstore8(DATA.s01234567, OFFSET, PTR);

#define vstore_partial_9(DATA, OFFSET, PTR)

#define vstore_partial_10(DATA, OFFSET, PTR)

#define vstore_partial_11(DATA, OFFSET, PTR)

#define vstore_partial_12(DATA, OFFSET, PTR)

#define vstore_partial_13(DATA, OFFSET, PTR)

#define vstore_partial_14(DATA, OFFSET, PTR)

#define vstore_partial_15(DATA, OFFSET, PTR)

#define vstore_partial_16(DATA, OFFSET, PTR) vstore16(DATA, OFFSET, PTR);

Typedefs
typedef struct Vector	Vector
	Structure to hold Vector information. More...

typedef struct Image	Image
	Structure to hold Image information. More...

typedef struct Tensor3D	Tensor3D
	Structure to hold 3D tensor information. More...

typedef struct Tensor4D	Tensor4D
	Structure to hold 4D tensor information. More...

Functions
Vector	update_vector_workitem_ptr (__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
	Wrap vector information into an Vector structure, and make the pointer point at this workitem's data. More...

Image	update_image_workitem_ptr (__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
	Wrap image information into an Image structure, and make the pointer point at this workitem's data. More...

Image	update_image_from_tensor3D_workitem_ptr (__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
	Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data. More...

Tensor3D	update_tensor3D_workitem_ptr (__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
	Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data. More...

Tensor3D	tensor3D_ptr_no_update (__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
	Wrap 3D tensor information into an tensor structure. More...

Tensor4D	update_tensor4D_workitem_ptr (__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w, uint step_w, uint mod_size)

__global const uchar *	vector_offset (const Vector *vec, int x)
	Get the pointer position of a Vector. More...

__global uchar *	offset (const Image *img, int x, int y)
	Get the pointer position of a Image. More...

__global const uchar *	tensor3D_offset (const Tensor3D *tensor, int x, int y, int z)
	Get the pointer position of a Tensor3D. More...

__global const uchar *	tensor4D_offset (const Tensor4D *tensor, int x, int y, int z, int w)
	Get the pointer position of a Tensor4D. More...

__global const uchar *	tensor3D_index2ptr (const Tensor3D *tensor, uint width, uint height, uint depth, uint index)
	Get the offset for a given linear index of a Tensor3D. More...

Macro Definition Documentation

◆ char1

#define char1 char

Definition at line 253 of file helpers.h.

◆ CLAMP

#define CLAMP	(	x,
		min_val,
		max_val
	)	min(max(x, min_val), max_val)

Clamp the given value between an upper and lower bound.

Parameters

[in]	x	The value to be clamped
[in]	min_val	The lower bound
[in]	max_val	The upper bound

Returns: The clamped value.

Definition at line 73 of file helpers.h.

◆ CONCAT

#define CONCAT	(	a,
		b
	)	a##b

Concatenate two inputs.

Parameters

[in]	a	The first input to be concatenated
[in]	b	The second input to be concatenated

Returns: The concatenated output

Definition at line 55 of file helpers.h.

◆ CONVERT

#define CONVERT	(	x,
		type
	)	CONVERT_STR(x, type)

Definition at line 522 of file helpers.h.

Referenced by bilinear_interpolate_with_border(), get_invsqrt_quantized_multiplier_exp(), and pooling_layer_3().

◆ convert_char1

#define convert_char1 convert_char

Definition at line 493 of file helpers.h.

◆ convert_char1_sat

#define convert_char1_sat convert_char_sat

Definition at line 503 of file helpers.h.

◆ convert_double1

#define convert_double1 convert_double

Definition at line 501 of file helpers.h.

◆ convert_double1_sat

#define convert_double1_sat convert_double_sat

Definition at line 516 of file helpers.h.

◆ convert_float1

#define convert_float1 convert_float

Definition at line 491 of file helpers.h.

◆ convert_float16_sat

#define convert_float16_sat convert_float16

Definition at line 482 of file helpers.h.

◆ convert_float1_sat

#define convert_float1_sat convert_float

Definition at line 477 of file helpers.h.

◆ convert_float2_sat

#define convert_float2_sat convert_float2

Definition at line 478 of file helpers.h.

◆ convert_float3_sat

#define convert_float3_sat convert_float3

Definition at line 479 of file helpers.h.

◆ convert_float4_sat

#define convert_float4_sat convert_float4

Definition at line 480 of file helpers.h.

◆ convert_float8_sat

#define convert_float8_sat convert_float8

Definition at line 481 of file helpers.h.

◆ convert_float_sat

#define convert_float_sat convert_float

Definition at line 476 of file helpers.h.

◆ convert_half1

#define convert_half1 convert_half

Definition at line 492 of file helpers.h.

◆ convert_half16_sat

#define convert_half16_sat convert_half16

Definition at line 489 of file helpers.h.

◆ convert_half1_sat

#define convert_half1_sat convert_half

Definition at line 484 of file helpers.h.

◆ convert_half2_sat

#define convert_half2_sat convert_half2

Definition at line 485 of file helpers.h.

◆ convert_half3_sat

#define convert_half3_sat convert_half3

Definition at line 486 of file helpers.h.

◆ convert_half4_sat

#define convert_half4_sat convert_half4

Definition at line 487 of file helpers.h.

◆ convert_half8_sat

#define convert_half8_sat convert_half8

Definition at line 488 of file helpers.h.

◆ convert_half_sat

#define convert_half_sat convert_float

Definition at line 483 of file helpers.h.

◆ convert_int1

#define convert_int1 convert_int

Definition at line 497 of file helpers.h.

◆ convert_int1_sat

#define convert_int1_sat convert_int_sat

Definition at line 512 of file helpers.h.

◆ convert_long1

#define convert_long1 convert_long

Definition at line 499 of file helpers.h.

◆ convert_long1_sat

#define convert_long1_sat convert_long_sat

Definition at line 514 of file helpers.h.

◆ CONVERT_SAT

#define CONVERT_SAT	(	x,
		type
	)	CONVERT_SAT_STR(x, type)

Definition at line 525 of file helpers.h.

Referenced by bilinear_interpolate_with_border_quantized(), and quantize_qasymm8().

◆ CONVERT_SAT_ROUND

#define CONVERT_SAT_ROUND	(	x,
		type,
		round
	)	CONVERT_SAT_ROUND_STR(x, type, round)

Definition at line 528 of file helpers.h.

◆ CONVERT_SAT_ROUND_STR

#define CONVERT_SAT_ROUND_STR	(	x,
		type,
		round
	)	(convert_##type##_sat_##round((x)))

Definition at line 527 of file helpers.h.

◆ CONVERT_SAT_STR

#define CONVERT_SAT_STR	(	x,
		type
	)	(convert_##type##_sat((x)))

Definition at line 524 of file helpers.h.

◆ convert_short1

#define convert_short1 convert_short

Definition at line 495 of file helpers.h.

◆ convert_short1_sat

#define convert_short1_sat convert_short_sat

Definition at line 510 of file helpers.h.

◆ CONVERT_STR

#define CONVERT_STR	(	x,
		type
	)	(convert_##type((x)))

Definition at line 521 of file helpers.h.

◆ CONVERT_TENSOR3D_TO_IMAGE_STRUCT [1/2]

#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT ( name ) update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)

Definition at line 644 of file helpers.h.

◆ CONVERT_TENSOR3D_TO_IMAGE_STRUCT [2/2]

#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT ( name ) update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)

Definition at line 644 of file helpers.h.

◆ CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP

#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP ( name ) update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)

Definition at line 641 of file helpers.h.

Referenced by fill_image_borders_constant(), and fill_image_borders_replicate().

◆ CONVERT_TO_IMAGE_STRUCT

#define CONVERT_TO_IMAGE_STRUCT ( name ) update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)

Definition at line 632 of file helpers.h.

Referenced by get_invsqrt_quantized_multiplier_exp(), non_max_suppression(), remap_bilinear_nchw(), remap_nearest_neighbour_nchw(), scale_bilinear_nchw(), scale_bilinear_quantized_nchw(), and scale_nearest_neighbour_nchw().

◆ CONVERT_TO_IMAGE_STRUCT_NO_STEP

#define CONVERT_TO_IMAGE_STRUCT_NO_STEP ( name ) update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)

Definition at line 635 of file helpers.h.

Referenced by remap_bilinear_nchw(), remap_nearest_neighbour_nchw(), roi_pooling_layer(), scale_bilinear_nchw(), scale_bilinear_quantized_nchw(), and scale_nearest_neighbour_nchw().

◆ CONVERT_TO_TENSOR3D_STRUCT

#define CONVERT_TO_TENSOR3D_STRUCT ( name )

Value:

update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \

name##_stride_z, name##_step_z)

update_tensor3D_workitem_ptr

Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)

Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's da...

Definition: helpers.h:787

name

const char * name

Definition: CpuActivationKernel.cpp:57

Definition at line 647 of file helpers.h.

Referenced by calculate_avg_scale(), deconvolution_upsample(), max_unpooling_layer_2(), pooling_layer_2(), pooling_layer_2_nchw_indices_fp16(), pooling_layer_2_nchw_indices_fp32(), pooling_layer_3(), reshape_layer(), upsample_layer_nchw(), and upsample_layer_nhwc().

◆ CONVERT_TO_TENSOR3D_STRUCT_NO_STEP

#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP ( name ) update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0)

Definition at line 651 of file helpers.h.

Referenced by deconvolution_upsample(), reshape_layer(), and roi_pooling_layer().

◆ CONVERT_TO_TENSOR3D_STRUCT_NO_UPDATE_PTR

#define CONVERT_TO_TENSOR3D_STRUCT_NO_UPDATE_PTR ( name )

Value:

tensor3D_ptr_no_update(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \

name##_stride_z, name##_step_z)

name

const char * name

Definition: CpuActivationKernel.cpp:57

tensor3D_ptr_no_update

Tensor3D tensor3D_ptr_no_update(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)

Wrap 3D tensor information into an tensor structure.

Definition: helpers.h:814

Definition at line 661 of file helpers.h.

Referenced by max_unpooling_layer_2().

◆ CONVERT_TO_TENSOR4D_STRUCT

#define CONVERT_TO_TENSOR4D_STRUCT	(	name,
		mod_size
	)

Value:

update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \

name##_stride_z, name##_step_z, name##_stride_w, name##_step_w, mod_size)

name

const char * name

Definition: CpuActivationKernel.cpp:57

update_tensor4D_workitem_ptr

Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w, uint step_w, uint mod_size)

Definition: helpers.h:827

Definition at line 654 of file helpers.h.

Referenced by strided_slice().

◆ CONVERT_TO_TENSOR4D_STRUCT_NO_STEP

#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP	(	name,
		mod_size
	)	update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)

Definition at line 658 of file helpers.h.

Referenced by strided_slice().

◆ CONVERT_TO_VECTOR_STRUCT

#define CONVERT_TO_VECTOR_STRUCT ( name ) update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)

Definition at line 626 of file helpers.h.

Referenced by get_invsqrt_quantized_multiplier_exp().

◆ CONVERT_TO_VECTOR_STRUCT_NO_STEP

#define CONVERT_TO_VECTOR_STRUCT_NO_STEP ( name ) update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)

Definition at line 629 of file helpers.h.

Referenced by deconvolution_upsample().

◆ convert_uchar1

#define convert_uchar1 convert_uchar

Definition at line 494 of file helpers.h.

◆ convert_uchar16_sat

#define convert_uchar16_sat convert_uchar16_sat

Definition at line 509 of file helpers.h.

◆ convert_uchar1_sat

#define convert_uchar1_sat convert_uchar_sat

Definition at line 504 of file helpers.h.

◆ convert_uchar2_sat

#define convert_uchar2_sat convert_uchar2_sat

Definition at line 505 of file helpers.h.

◆ convert_uchar3_sat

#define convert_uchar3_sat convert_uchar3_sat

Definition at line 506 of file helpers.h.

◆ convert_uchar4_sat

#define convert_uchar4_sat convert_uchar4_sat

Definition at line 507 of file helpers.h.

◆ convert_uchar8_sat

#define convert_uchar8_sat convert_uchar8_sat

Definition at line 508 of file helpers.h.

◆ convert_uint1

#define convert_uint1 convert_uint

Definition at line 498 of file helpers.h.

◆ convert_uint1_sat

#define convert_uint1_sat convert_uint_sat

Definition at line 513 of file helpers.h.

◆ convert_ulong1

#define convert_ulong1 convert_ulong

Definition at line 500 of file helpers.h.

◆ convert_ulong1_sat

#define convert_ulong1_sat convert_ulong_sat

Definition at line 515 of file helpers.h.

◆ convert_ushort1

#define convert_ushort1 convert_ushort

Definition at line 496 of file helpers.h.

◆ convert_ushort1_sat

#define convert_ushort1_sat convert_ushort_sat

Definition at line 511 of file helpers.h.

◆ CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT

#define CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT ( vec_size ) CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR(vec_size)

Definition at line 219 of file helpers.h.

◆ CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR

#define CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR ( vec_size ) PIXEL_UNIT##vec_size

Definition at line 218 of file helpers.h.

◆ double1

#define double1 double

Definition at line 261 of file helpers.h.

◆ EXPAND

#define EXPAND ( x ) x

Expand the given vector.

Parameters

[in] x The vector to be expanded

Returns: The expanded output

Definition at line 63 of file helpers.h.

◆ float1

#define float1 float

Definition at line 251 of file helpers.h.

◆ GPU_ARCH_BIFROST

#define GPU_ARCH_BIFROST 0x200

Definition at line 46 of file helpers.h.

◆ GPU_ARCH_MIDGARD

#define GPU_ARCH_MIDGARD 0x100

Definition at line 45 of file helpers.h.

◆ half1

#define half1 half

Definition at line 252 of file helpers.h.

◆ IMAGE_DECLARATION

#define IMAGE_DECLARATION ( name )

Value:

__global uchar *name##_ptr,      \
    uint        name##_stride_x, \
    uint        name##_step_x,   \
    uint        name##_stride_y, \
    uint        name##_step_y,   \
    uint        name##_offset_first_element_in_bytes

Definition at line 596 of file helpers.h.

Referenced by get_invsqrt_quantized_multiplier_exp().

◆ int1

#define int1 int

Definition at line 257 of file helpers.h.

◆ long1

#define long1 long

Definition at line 259 of file helpers.h.

◆ MAX_REDUCE

#define MAX_REDUCE	(	x,
		size
	)	MAX_REDUCE_STR(x, size)

Definition at line 588 of file helpers.h.

◆ max_reduce_1

#define max_reduce_1 ( x ) (x)

Definition at line 580 of file helpers.h.

◆ max_reduce_16

#define max_reduce_16 ( x ) max(max_reduce_8((x).s01234567), max_reduce_8((x).s89ABCDEF))

Definition at line 585 of file helpers.h.

◆ max_reduce_2

#define max_reduce_2 ( x ) max(((x).s0), ((x).s1))

Definition at line 581 of file helpers.h.

◆ max_reduce_3

#define max_reduce_3 ( x ) max(max_reduce_2((x).s01), ((x).s2))

Definition at line 582 of file helpers.h.

◆ max_reduce_4

#define max_reduce_4 ( x ) max(max_reduce_2((x).s01), max_reduce_2((x).s23))

Definition at line 583 of file helpers.h.

◆ max_reduce_8

#define max_reduce_8 ( x ) max(max_reduce_4((x).s0123), max_reduce_4((x).s4567))

Definition at line 584 of file helpers.h.

◆ MAX_REDUCE_STR

#define MAX_REDUCE_STR	(	x,
		size
	)	max_reduce_##size(x)

Definition at line 587 of file helpers.h.

◆ NO_STORE

#define NO_STORE	(	data,
		offs,
		ptr
	)

Value:

{ \

}

Definition at line 284 of file helpers.h.

◆ PIXEL_UNIT16

#define PIXEL_UNIT16 4

Definition at line 207 of file helpers.h.

◆ PIXEL_UNIT4

#define PIXEL_UNIT4 1

Definition at line 205 of file helpers.h.

◆ PIXEL_UNIT8

#define PIXEL_UNIT8 2

Definition at line 206 of file helpers.h.

◆ PROD_REDUCE

#define PROD_REDUCE	(	x,
		size
	)	PROD_REDUCE_STR(x, size)

Definition at line 578 of file helpers.h.

◆ prod_reduce_1

#define prod_reduce_1 ( x ) (x)

Definition at line 570 of file helpers.h.

◆ prod_reduce_16

#define prod_reduce_16 ( x ) prod_reduce_8((x).s01234567) * prod_reduce_8((x).s89ABCDEF)

Definition at line 575 of file helpers.h.

◆ prod_reduce_2

#define prod_reduce_2 ( x ) ((x).s0) * ((x).s1)

Definition at line 571 of file helpers.h.

◆ prod_reduce_3

#define prod_reduce_3 ( x ) prod_reduce_2((x).s01) * ((x).s2)

Definition at line 572 of file helpers.h.

◆ prod_reduce_4

#define prod_reduce_4 ( x ) prod_reduce_2((x).s01) * prod_reduce_2((x).s23)

Definition at line 573 of file helpers.h.

◆ prod_reduce_8

#define prod_reduce_8 ( x ) prod_reduce_4((x).s0123) * prod_reduce_4((x).s4567)

Definition at line 574 of file helpers.h.

◆ PROD_REDUCE_STR

#define PROD_REDUCE_STR	(	x,
		size
	)	prod_reduce_##size(x)

Definition at line 577 of file helpers.h.

◆ READ_IMAGE2D

#define READ_IMAGE2D	(	data_type,
		n0,
		img,
		x_coord,
		y_coord
	)	READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord)

Definition at line 246 of file helpers.h.

◆ read_image2d_floatx1

#define read_image2d_floatx1	(	img,
		x_coord,
		y_coord
	)	(float4)(read_imagef(img, (int2)(x_coord, y_coord)));

Definition at line 222 of file helpers.h.

◆ read_image2d_floatx2

#define read_image2d_floatx2	(	img,
		x_coord,
		y_coord
	)	(float8)(read_imagef(img, (int2)(x_coord, y_coord)), read_imagef(img, (int2)(x_coord + 1, y_coord)));

Definition at line 223 of file helpers.h.

◆ read_image2d_floatx4

#define read_image2d_floatx4	(	img,
		x_coord,
		y_coord
	)	(float16)(read_imagef(img, (int2)(x_coord, y_coord)), read_imagef(img, (int2)(x_coord + 1, y_coord)), read_imagef(img, (int2)(x_coord + 2, y_coord)), read_imagef(img, (int2)(x_coord + 3, y_coord)));

Definition at line 224 of file helpers.h.

◆ READ_IMAGE2D_STR

#define READ_IMAGE2D_STR	(	data_type,
		n0,
		img,
		x_coord,
		y_coord
	)	read_image2d_##data_type##x##n0(img, x_coord, y_coord)

Utility macro to read a 2D OpenCL image object.

Note: Coordinates are not normalized

Parameters

[in]	data_type	Data type
[in]	n0	Number of pixel to read. Only 1,2 and 4 is supported
[in]	img	OpenCL image object
[in]	x_coord	The x coordinate for the top-left pixel
[in]	y_coord	The y coordinate for the top-left pixel

Returns: Pixels from the 2D OpenCL image object

Definition at line 245 of file helpers.h.

◆ REV1

#define REV1 ( x ) ((x))

Definition at line 83 of file helpers.h.

◆ REV16

#define REV16 ( x ) ((x).sFEDCBA9876543210)

Definition at line 88 of file helpers.h.

◆ REV2

#define REV2 ( x ) ((x).s10)

Definition at line 84 of file helpers.h.

◆ REV3

#define REV3 ( x ) ((x).s210)

Definition at line 85 of file helpers.h.

◆ REV4

#define REV4 ( x ) ((x).s3210)

Definition at line 86 of file helpers.h.

◆ REV8

#define REV8 ( x ) ((x).s76543210)

Definition at line 87 of file helpers.h.

◆ REVERSE

#define REVERSE	(	x,
		s
	)	REVERSE_STR(x, s)

Definition at line 101 of file helpers.h.

◆ REVERSE_STR

#define REVERSE_STR	(	x,
		s
	)	REV##s((x))

Definition at line 100 of file helpers.h.

◆ ROT16_0

#define ROT16_0 ( x ) ((x))

Definition at line 140 of file helpers.h.

◆ ROT16_1

#define ROT16_1 ( x ) ((x).sF0123456789ABCDE)

Definition at line 141 of file helpers.h.

◆ ROT16_10

#define ROT16_10 ( x ) ((x).s6789ABCDEF012345)

Definition at line 150 of file helpers.h.

◆ ROT16_11

#define ROT16_11 ( x ) ((x).s56789ABCDEF01234)

Definition at line 151 of file helpers.h.

◆ ROT16_12

#define ROT16_12 ( x ) ((x).s456789ABCDEF0123)

Definition at line 152 of file helpers.h.

◆ ROT16_13

#define ROT16_13 ( x ) ((x).s3456789ABCDEF012)

Definition at line 153 of file helpers.h.

◆ ROT16_14

#define ROT16_14 ( x ) ((x).s23456789ABCDEF01)

Definition at line 154 of file helpers.h.

◆ ROT16_15

#define ROT16_15 ( x ) ((x).s123456789ABCDEF0)

Definition at line 155 of file helpers.h.

◆ ROT16_16

#define ROT16_16 ( x ) ((x))

Definition at line 156 of file helpers.h.

◆ ROT16_2

#define ROT16_2 ( x ) ((x).sEF0123456789ABCD)

Definition at line 142 of file helpers.h.

◆ ROT16_3

#define ROT16_3 ( x ) ((x).sDEF0123456789ABC)

Definition at line 143 of file helpers.h.

◆ ROT16_4

#define ROT16_4 ( x ) ((x).sCDEF0123456789AB)

Definition at line 144 of file helpers.h.

◆ ROT16_5

#define ROT16_5 ( x ) ((x).sBCDEF0123456789A)

Definition at line 145 of file helpers.h.

◆ ROT16_6

#define ROT16_6 ( x ) ((x).sABCDEF0123456789)

Definition at line 146 of file helpers.h.

◆ ROT16_7

#define ROT16_7 ( x ) ((x).s9ABCDEF012345678)

Definition at line 147 of file helpers.h.

◆ ROT16_8

#define ROT16_8 ( x ) ((x).s89ABCDEF01234567)

Definition at line 148 of file helpers.h.

◆ ROT16_9

#define ROT16_9 ( x ) ((x).s789ABCDEF0123456)

Definition at line 149 of file helpers.h.

◆ ROT1_0

#define ROT1_0 ( x ) ((x))

Definition at line 112 of file helpers.h.

◆ ROT1_1

#define ROT1_1 ( x ) ((x))

Definition at line 113 of file helpers.h.

◆ ROT2_0

#define ROT2_0 ( x ) ((x))

Definition at line 115 of file helpers.h.

◆ ROT2_1

#define ROT2_1 ( x ) ((x).s10)

Definition at line 116 of file helpers.h.

◆ ROT2_2

#define ROT2_2 ( x ) ((x))

Definition at line 117 of file helpers.h.

◆ ROT3_0

#define ROT3_0 ( x ) ((x))

Definition at line 119 of file helpers.h.

◆ ROT3_1

#define ROT3_1 ( x ) ((x).s201)

Definition at line 120 of file helpers.h.

◆ ROT3_2

#define ROT3_2 ( x ) ((x).s120)

Definition at line 121 of file helpers.h.

◆ ROT3_3

#define ROT3_3 ( x ) ((x))

Definition at line 122 of file helpers.h.

◆ ROT4_0

#define ROT4_0 ( x ) ((x))

Definition at line 124 of file helpers.h.

◆ ROT4_1

#define ROT4_1 ( x ) ((x).s3012)

Definition at line 125 of file helpers.h.

◆ ROT4_2

#define ROT4_2 ( x ) ((x).s2301)

Definition at line 126 of file helpers.h.

◆ ROT4_3

#define ROT4_3 ( x ) ((x).s1230)

Definition at line 127 of file helpers.h.

◆ ROT4_4

#define ROT4_4 ( x ) ((x))

Definition at line 128 of file helpers.h.

◆ ROT8_0

#define ROT8_0 ( x ) ((x))

Definition at line 130 of file helpers.h.

◆ ROT8_1

#define ROT8_1 ( x ) ((x).s70123456)

Definition at line 131 of file helpers.h.

◆ ROT8_2

#define ROT8_2 ( x ) ((x).s67012345)

Definition at line 132 of file helpers.h.

◆ ROT8_3

#define ROT8_3 ( x ) ((x).s56701234)

Definition at line 133 of file helpers.h.

◆ ROT8_4

#define ROT8_4 ( x ) ((x).s45670123)

Definition at line 134 of file helpers.h.

◆ ROT8_5

#define ROT8_5 ( x ) ((x).s34567012)

Definition at line 135 of file helpers.h.

◆ ROT8_6

#define ROT8_6 ( x ) ((x).s23456701)

Definition at line 136 of file helpers.h.

◆ ROT8_7

#define ROT8_7 ( x ) ((x).s12345670)

Definition at line 137 of file helpers.h.

◆ ROT8_8

#define ROT8_8 ( x ) ((x))

Definition at line 138 of file helpers.h.

◆ ROTATE

#define ROTATE	(	x,
		s,
		n
	)	ROTATE_STR(x, s, n)

Definition at line 170 of file helpers.h.

◆ ROTATE_STR

#define ROTATE_STR	(	x,
		s,
		n
	)	ROT##s##_##n(x)

Definition at line 169 of file helpers.h.

◆ SELECT_DATA_TYPE

#define SELECT_DATA_TYPE ( type ) SELECT_VEC_DATA_TYPE_STR(type, 1)

Definition at line 543 of file helpers.h.

◆ SELECT_VEC_DATA_TYPE

#define SELECT_VEC_DATA_TYPE	(	type,
		size
	)	SELECT_VEC_DATA_TYPE_STR(type, size)

Definition at line 542 of file helpers.h.

◆ SELECT_VEC_DATA_TYPE_STR

#define SELECT_VEC_DATA_TYPE_STR	(	type,
		size
	)	select_vec_dt_##type(size)

Definition at line 541 of file helpers.h.

◆ select_vec_dt_char

#define select_vec_dt_char ( size ) char##size

Definition at line 531 of file helpers.h.

◆ select_vec_dt_float

#define select_vec_dt_float ( size ) int##size

Definition at line 537 of file helpers.h.

◆ select_vec_dt_half

#define select_vec_dt_half ( size ) short##size

Definition at line 534 of file helpers.h.

◆ select_vec_dt_int

#define select_vec_dt_int ( size ) int##size

Definition at line 536 of file helpers.h.

◆ select_vec_dt_long

#define select_vec_dt_long ( size ) long##size

Definition at line 539 of file helpers.h.

◆ select_vec_dt_short

#define select_vec_dt_short ( size ) short##size

Definition at line 533 of file helpers.h.

◆ select_vec_dt_uchar

#define select_vec_dt_uchar ( size ) uchar##size

Definition at line 530 of file helpers.h.

◆ select_vec_dt_uint

#define select_vec_dt_uint ( size ) uint##size

Definition at line 535 of file helpers.h.

◆ select_vec_dt_ulong

#define select_vec_dt_ulong ( size ) ulong##size

Definition at line 538 of file helpers.h.

◆ select_vec_dt_ushort

#define select_vec_dt_ushort ( size ) ushort##size

Definition at line 532 of file helpers.h.

◆ short1

#define short1 short

Definition at line 255 of file helpers.h.

◆ SIGNED_INT_DATA_TYPE

#define SIGNED_INT_DATA_TYPE ( type ) SIGNED_INT_VEC_DATA_TYPE_STR(type, 1)

Definition at line 558 of file helpers.h.

◆ SIGNED_INT_VEC_DATA_TYPE

#define SIGNED_INT_VEC_DATA_TYPE	(	type,
		size
	)	SIGNED_INT_VEC_DATA_TYPE_STR(type, size)

Definition at line 557 of file helpers.h.

◆ SIGNED_INT_VEC_DATA_TYPE_STR

#define SIGNED_INT_VEC_DATA_TYPE_STR	(	type,
		size
	)	signed_int_vec_dt_##type(size)

Definition at line 556 of file helpers.h.

◆ signed_int_vec_dt_char

#define signed_int_vec_dt_char ( size ) char##size

Definition at line 546 of file helpers.h.

◆ signed_int_vec_dt_float

#define signed_int_vec_dt_float ( size ) int##size

Definition at line 552 of file helpers.h.

◆ signed_int_vec_dt_half

#define signed_int_vec_dt_half ( size ) short##size

Definition at line 549 of file helpers.h.

◆ signed_int_vec_dt_int

#define signed_int_vec_dt_int ( size ) int##size

Definition at line 551 of file helpers.h.

◆ signed_int_vec_dt_long

#define signed_int_vec_dt_long ( size ) long##size

Definition at line 554 of file helpers.h.

◆ signed_int_vec_dt_short

#define signed_int_vec_dt_short ( size ) short##size

Definition at line 548 of file helpers.h.

◆ signed_int_vec_dt_uchar

#define signed_int_vec_dt_uchar ( size ) char##size

Definition at line 545 of file helpers.h.

◆ signed_int_vec_dt_uint

#define signed_int_vec_dt_uint ( size ) int##size

Definition at line 550 of file helpers.h.

◆ signed_int_vec_dt_ulong

#define signed_int_vec_dt_ulong ( size ) long##size

Definition at line 553 of file helpers.h.

◆ signed_int_vec_dt_ushort

#define signed_int_vec_dt_ushort ( size ) short##size

Definition at line 547 of file helpers.h.

◆ SUM_REDUCE

#define SUM_REDUCE	(	x,
		size
	)	SUM_REDUCE_STR(x, size)

Definition at line 568 of file helpers.h.

◆ sum_reduce_1

#define sum_reduce_1 ( x ) (x)

Definition at line 560 of file helpers.h.

◆ sum_reduce_16

#define sum_reduce_16 ( x ) sum_reduce_8((x).s01234567) + sum_reduce_8((x).s89ABCDEF)

Definition at line 565 of file helpers.h.

◆ sum_reduce_2

#define sum_reduce_2 ( x ) ((x).s0) + ((x).s1)

Definition at line 561 of file helpers.h.

◆ sum_reduce_3

#define sum_reduce_3 ( x ) sum_reduce_2((x).s01) + ((x).s2)

Definition at line 562 of file helpers.h.

◆ sum_reduce_4

#define sum_reduce_4 ( x ) sum_reduce_2((x).s01) + sum_reduce_2((x).s23)

Definition at line 563 of file helpers.h.

◆ sum_reduce_8

#define sum_reduce_8 ( x ) sum_reduce_4((x).s0123) + sum_reduce_4((x).s4567)

Definition at line 564 of file helpers.h.

◆ SUM_REDUCE_STR

#define SUM_REDUCE_STR	(	x,
		size
	)	sum_reduce_##size(x)

Definition at line 567 of file helpers.h.

◆ TENSOR3D_DECLARATION

#define TENSOR3D_DECLARATION ( name )

Value:

__global uchar *name##_ptr,      \
    uint        name##_stride_x, \
    uint        name##_step_x,   \
    uint        name##_stride_y, \
    uint        name##_step_y,   \
    uint        name##_stride_z, \
    uint        name##_step_z,   \
    uint        name##_offset_first_element_in_bytes

Definition at line 604 of file helpers.h.

Referenced by calculate_avg_scale(), deconvolution_upsample(), and pooling_layer_3().

◆ TENSOR4D_DECLARATION

#define TENSOR4D_DECLARATION ( name )

Value:

__global uchar *name##_ptr,      \
    uint        name##_stride_x, \
    uint        name##_step_x,   \
    uint        name##_stride_y, \
    uint        name##_step_y,   \
    uint        name##_stride_z, \
    uint        name##_step_z,   \
    uint        name##_stride_w, \
    uint        name##_step_w,   \
    uint        name##_offset_first_element_in_bytes

Definition at line 614 of file helpers.h.

◆ uchar1

#define uchar1 uchar

Definition at line 254 of file helpers.h.

◆ uint1

#define uint1 uint

Definition at line 258 of file helpers.h.

◆ ulong1

#define ulong1 ulong

Definition at line 260 of file helpers.h.

◆ ushort1

#define ushort1 ushort

Definition at line 256 of file helpers.h.

◆ V_OFFS1

#define V_OFFS1 ( dt ) (dt##1)(0)

Definition at line 181 of file helpers.h.

◆ V_OFFS16

#define V_OFFS16 ( dt ) (dt##16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)

Definition at line 186 of file helpers.h.

◆ V_OFFS2

#define V_OFFS2 ( dt ) (dt##2)(0, 1)

Definition at line 182 of file helpers.h.

◆ V_OFFS3

#define V_OFFS3 ( dt ) (dt##3)(0, 1, 2)

Definition at line 183 of file helpers.h.

◆ V_OFFS4

#define V_OFFS4 ( dt ) (dt##4)(0, 1, 2, 3)

Definition at line 184 of file helpers.h.

◆ V_OFFS8

#define V_OFFS8 ( dt ) (dt##8)(0, 1, 2, 3, 4, 5, 6, 7)

Definition at line 185 of file helpers.h.

◆ VEC_DATA_TYPE

#define VEC_DATA_TYPE	(	type,
		size
	)	VEC_DATA_TYPE_STR(type, size)

◆ VEC_DATA_TYPE_STR

#define VEC_DATA_TYPE_STR	(	type,
		size
	)	type##size

Definition at line 518 of file helpers.h.

◆ VEC_OFFS

#define VEC_OFFS	(	dt,
		s
	)	VEC_OFFS_STR(dt, s)

Definition at line 199 of file helpers.h.

◆ VEC_OFFS_STR

#define VEC_OFFS_STR	(	dt,
		s
	)	V_OFFS##s(dt)

Definition at line 198 of file helpers.h.

◆ VECTOR_DECLARATION

#define VECTOR_DECLARATION ( name )

Value:

__global uchar *name##_ptr,      \
    uint        name##_stride_x, \
    uint        name##_step_x,   \
    uint        name##_offset_first_element_in_bytes

Definition at line 590 of file helpers.h.

Referenced by deconvolution_upsample(), and get_invsqrt_quantized_multiplier_exp().

◆ VLOAD

#define VLOAD ( size ) VLOAD_STR(size)

Definition at line 203 of file helpers.h.

Referenced by cast_down(), cast_up(), get_invsqrt_quantized_multiplier_exp(), pooling_layer_2_nchw_indices_fp16(), pooling_layer_2_nchw_indices_fp32(), roi_pool_1x1(), and strided_slice().

◆ vload1

#define vload1	(	OFFSET,
		PTR
	)	*(OFFSET + PTR)

Definition at line 263 of file helpers.h.

◆ VLOAD_STR

#define VLOAD_STR ( size ) vload##size

Definition at line 202 of file helpers.h.

◆ VSTORE

#define VSTORE ( size ) VSTORE_STR(size)

Definition at line 249 of file helpers.h.

Referenced by get_invsqrt_quantized_multiplier_exp(), and strided_slice().

◆ vstore1

#define vstore1	(	DATA,
		OFFSET,
		PTR
	)	*(OFFSET + PTR) = DATA

Definition at line 264 of file helpers.h.

◆ VSTORE_PARTIAL

#define VSTORE_PARTIAL	(	size,
		store_size
	)	VSTORE_PARTIAL_STR(size, store_size)

Definition at line 282 of file helpers.h.

◆ vstore_partial_1

#define vstore_partial_1	(	DATA,
		OFFSET,
		PTR
	)	vstore1(DATA.s0, OFFSET, PTR);

Definition at line 414 of file helpers.h.

◆ vstore_partial_10

#define vstore_partial_10	(	DATA,
		OFFSET,
		PTR
	)

Value:

vstore_partial_8(DATA.s01234567, OFFSET, PTR); \

vstore_partial_2(DATA.s89, OFFSET, PTR + 8);

vstore_partial_8

#define vstore_partial_8(DATA, OFFSET, PTR)

Definition: helpers.h:438

Definition at line 445 of file helpers.h.

◆ vstore_partial_11

#define vstore_partial_11	(	DATA,
		OFFSET,
		PTR
	)

Value:

vstore_partial_8(DATA.s01234567, OFFSET, PTR); \

vstore_partial_3(DATA.s89a, OFFSET, PTR + 8);

vstore_partial_8

#define vstore_partial_8(DATA, OFFSET, PTR)

Definition: helpers.h:438

Definition at line 449 of file helpers.h.

◆ vstore_partial_12

#define vstore_partial_12	(	DATA,
		OFFSET,
		PTR
	)

Value:

vstore_partial_8(DATA.s01234567, OFFSET, PTR); \

vstore_partial_4(DATA.s89ab, OFFSET, PTR + 8);

vstore_partial_8

#define vstore_partial_8(DATA, OFFSET, PTR)

Definition: helpers.h:438

Definition at line 453 of file helpers.h.

◆ vstore_partial_13

#define vstore_partial_13	(	DATA,
		OFFSET,
		PTR
	)

Value:

vstore_partial_8(DATA.s01234567, OFFSET, PTR); \

vstore_partial_5(DATA.s89abcdef, OFFSET, PTR + 8);

vstore_partial_8

#define vstore_partial_8(DATA, OFFSET, PTR)

Definition: helpers.h:438

Definition at line 457 of file helpers.h.

◆ vstore_partial_14

#define vstore_partial_14	(	DATA,
		OFFSET,
		PTR
	)

Value:

vstore_partial_8(DATA.s01234567, OFFSET, PTR); \

vstore_partial_6(DATA.s89abcdef, OFFSET, PTR + 8);

vstore_partial_8

#define vstore_partial_8(DATA, OFFSET, PTR)

Definition: helpers.h:438

Definition at line 461 of file helpers.h.

◆ vstore_partial_15

#define vstore_partial_15	(	DATA,
		OFFSET,
		PTR
	)

Value:

vstore_partial_8(DATA.s01234567, OFFSET, PTR); \

vstore_partial_7(DATA.s89abcdef, OFFSET, PTR + 8);

vstore_partial_8

#define vstore_partial_8(DATA, OFFSET, PTR)

Definition: helpers.h:438

Definition at line 465 of file helpers.h.

◆ vstore_partial_16

#define vstore_partial_16	(	DATA,
		OFFSET,
		PTR
	)	vstore16(DATA, OFFSET, PTR);

Definition at line 469 of file helpers.h.

◆ vstore_partial_16_0

#define vstore_partial_16_0 NO_STORE

Definition at line 379 of file helpers.h.

◆ vstore_partial_16_1

#define vstore_partial_16_1 vstore_partial_1

Definition at line 380 of file helpers.h.

◆ vstore_partial_16_10

#define vstore_partial_16_10 vstore_partial_10

Definition at line 389 of file helpers.h.

◆ vstore_partial_16_11

#define vstore_partial_16_11 vstore_partial_11

Definition at line 390 of file helpers.h.

◆ vstore_partial_16_12

#define vstore_partial_16_12 vstore_partial_12

Definition at line 391 of file helpers.h.

◆ vstore_partial_16_13

#define vstore_partial_16_13 vstore_partial_13

Definition at line 392 of file helpers.h.

◆ vstore_partial_16_14

#define vstore_partial_16_14 vstore_partial_14

Definition at line 393 of file helpers.h.

◆ vstore_partial_16_15

#define vstore_partial_16_15 vstore_partial_15

Definition at line 394 of file helpers.h.

◆ vstore_partial_16_16

#define vstore_partial_16_16 vstore_partial_16

Definition at line 395 of file helpers.h.

◆ vstore_partial_16_2

#define vstore_partial_16_2 vstore_partial_2

Definition at line 381 of file helpers.h.

◆ vstore_partial_16_3

#define vstore_partial_16_3 vstore_partial_3

Definition at line 382 of file helpers.h.

◆ vstore_partial_16_4

#define vstore_partial_16_4 vstore_partial_4

Definition at line 383 of file helpers.h.

◆ vstore_partial_16_5

#define vstore_partial_16_5 vstore_partial_5

Definition at line 384 of file helpers.h.

◆ vstore_partial_16_6

#define vstore_partial_16_6 vstore_partial_6

Definition at line 385 of file helpers.h.

◆ vstore_partial_16_7

#define vstore_partial_16_7 vstore_partial_7

Definition at line 386 of file helpers.h.

◆ vstore_partial_16_8

#define vstore_partial_16_8 vstore_partial_8

Definition at line 387 of file helpers.h.

◆ vstore_partial_16_9

#define vstore_partial_16_9 vstore_partial_9

Definition at line 388 of file helpers.h.

◆ vstore_partial_1_0

#define vstore_partial_1_0 NO_STORE

Definition at line 289 of file helpers.h.

◆ vstore_partial_1_1

#define vstore_partial_1_1 vstore1

Definition at line 290 of file helpers.h.

◆ vstore_partial_1_10

#define vstore_partial_1_10 NO_STORE

Definition at line 299 of file helpers.h.

◆ vstore_partial_1_11

#define vstore_partial_1_11 NO_STORE

Definition at line 300 of file helpers.h.

◆ vstore_partial_1_12

#define vstore_partial_1_12 NO_STORE

Definition at line 301 of file helpers.h.

◆ vstore_partial_1_13

#define vstore_partial_1_13 NO_STORE

Definition at line 302 of file helpers.h.

◆ vstore_partial_1_14

#define vstore_partial_1_14 NO_STORE

Definition at line 303 of file helpers.h.

◆ vstore_partial_1_15

#define vstore_partial_1_15 NO_STORE

Definition at line 304 of file helpers.h.

◆ vstore_partial_1_16

#define vstore_partial_1_16 NO_STORE

Definition at line 305 of file helpers.h.

◆ vstore_partial_1_2

#define vstore_partial_1_2 NO_STORE

Definition at line 291 of file helpers.h.

◆ vstore_partial_1_3

#define vstore_partial_1_3 NO_STORE

Definition at line 292 of file helpers.h.

◆ vstore_partial_1_4

#define vstore_partial_1_4 NO_STORE

Definition at line 293 of file helpers.h.

◆ vstore_partial_1_5

#define vstore_partial_1_5 NO_STORE

Definition at line 294 of file helpers.h.

◆ vstore_partial_1_6

#define vstore_partial_1_6 NO_STORE

Definition at line 295 of file helpers.h.

◆ vstore_partial_1_7

#define vstore_partial_1_7 NO_STORE

Definition at line 296 of file helpers.h.

◆ vstore_partial_1_8

#define vstore_partial_1_8 NO_STORE

Definition at line 297 of file helpers.h.

◆ vstore_partial_1_9

#define vstore_partial_1_9 NO_STORE

Definition at line 298 of file helpers.h.

◆ vstore_partial_2

#define vstore_partial_2	(	DATA,
		OFFSET,
		PTR
	)	vstore2(DATA.s01, OFFSET, PTR);

Definition at line 417 of file helpers.h.

◆ vstore_partial_2_0

#define vstore_partial_2_0 NO_STORE

Definition at line 307 of file helpers.h.

◆ vstore_partial_2_1

#define vstore_partial_2_1 vstore_partial_1

Definition at line 308 of file helpers.h.

◆ vstore_partial_2_10

#define vstore_partial_2_10 NO_STORE

Definition at line 317 of file helpers.h.

◆ vstore_partial_2_11

#define vstore_partial_2_11 NO_STORE

Definition at line 318 of file helpers.h.

◆ vstore_partial_2_12

#define vstore_partial_2_12 NO_STORE

Definition at line 319 of file helpers.h.

◆ vstore_partial_2_13

#define vstore_partial_2_13 NO_STORE

Definition at line 320 of file helpers.h.

◆ vstore_partial_2_14

#define vstore_partial_2_14 NO_STORE

Definition at line 321 of file helpers.h.

◆ vstore_partial_2_15

#define vstore_partial_2_15 NO_STORE

Definition at line 322 of file helpers.h.

◆ vstore_partial_2_16

#define vstore_partial_2_16 NO_STORE

Definition at line 323 of file helpers.h.

◆ vstore_partial_2_2

#define vstore_partial_2_2 vstore_partial_2

Definition at line 309 of file helpers.h.

◆ vstore_partial_2_3

#define vstore_partial_2_3 NO_STORE

Definition at line 310 of file helpers.h.

◆ vstore_partial_2_4

#define vstore_partial_2_4 NO_STORE

Definition at line 311 of file helpers.h.

◆ vstore_partial_2_5

#define vstore_partial_2_5 NO_STORE

Definition at line 312 of file helpers.h.

◆ vstore_partial_2_6

#define vstore_partial_2_6 NO_STORE

Definition at line 313 of file helpers.h.

◆ vstore_partial_2_7

#define vstore_partial_2_7 NO_STORE

Definition at line 314 of file helpers.h.

◆ vstore_partial_2_8

#define vstore_partial_2_8 NO_STORE

Definition at line 315 of file helpers.h.

◆ vstore_partial_2_9

#define vstore_partial_2_9 NO_STORE

Definition at line 316 of file helpers.h.

◆ vstore_partial_3

#define vstore_partial_3	(	DATA,
		OFFSET,
		PTR
	)	vstore3(DATA.s012, OFFSET, PTR);

Definition at line 420 of file helpers.h.

◆ vstore_partial_3_0

#define vstore_partial_3_0 NO_STORE

Definition at line 325 of file helpers.h.

◆ vstore_partial_3_1

#define vstore_partial_3_1 vstore_partial_1

Definition at line 326 of file helpers.h.

◆ vstore_partial_3_10

#define vstore_partial_3_10 NO_STORE

Definition at line 335 of file helpers.h.

◆ vstore_partial_3_11

#define vstore_partial_3_11 NO_STORE

Definition at line 336 of file helpers.h.

◆ vstore_partial_3_12

#define vstore_partial_3_12 NO_STORE

Definition at line 337 of file helpers.h.

◆ vstore_partial_3_13

#define vstore_partial_3_13 NO_STORE

Definition at line 338 of file helpers.h.

◆ vstore_partial_3_14

#define vstore_partial_3_14 NO_STORE

Definition at line 339 of file helpers.h.

◆ vstore_partial_3_15

#define vstore_partial_3_15 NO_STORE

Definition at line 340 of file helpers.h.

◆ vstore_partial_3_16

#define vstore_partial_3_16 NO_STORE

Definition at line 341 of file helpers.h.

◆ vstore_partial_3_2

#define vstore_partial_3_2 vstore_partial_2

Definition at line 327 of file helpers.h.

◆ vstore_partial_3_3

#define vstore_partial_3_3 vstore_partial_3

Definition at line 328 of file helpers.h.

◆ vstore_partial_3_4

#define vstore_partial_3_4 NO_STORE

Definition at line 329 of file helpers.h.

◆ vstore_partial_3_5

#define vstore_partial_3_5 NO_STORE

Definition at line 330 of file helpers.h.

◆ vstore_partial_3_6

#define vstore_partial_3_6 NO_STORE

Definition at line 331 of file helpers.h.

◆ vstore_partial_3_7

#define vstore_partial_3_7 NO_STORE

Definition at line 332 of file helpers.h.

◆ vstore_partial_3_8

#define vstore_partial_3_8 NO_STORE

Definition at line 333 of file helpers.h.

◆ vstore_partial_3_9

#define vstore_partial_3_9 NO_STORE

Definition at line 334 of file helpers.h.

◆ vstore_partial_4

#define vstore_partial_4	(	DATA,
		OFFSET,
		PTR
	)	vstore4(DATA.s0123, OFFSET, PTR);

Definition at line 423 of file helpers.h.

◆ vstore_partial_4_0

#define vstore_partial_4_0 NO_STORE

Definition at line 343 of file helpers.h.

◆ vstore_partial_4_1

#define vstore_partial_4_1 vstore_partial_1

Definition at line 344 of file helpers.h.

◆ vstore_partial_4_10

#define vstore_partial_4_10 NO_STORE

Definition at line 353 of file helpers.h.

◆ vstore_partial_4_11

#define vstore_partial_4_11 NO_STORE

Definition at line 354 of file helpers.h.

◆ vstore_partial_4_12

#define vstore_partial_4_12 NO_STORE

Definition at line 355 of file helpers.h.

◆ vstore_partial_4_13

#define vstore_partial_4_13 NO_STORE

Definition at line 356 of file helpers.h.

◆ vstore_partial_4_14

#define vstore_partial_4_14 NO_STORE

Definition at line 357 of file helpers.h.

◆ vstore_partial_4_15

#define vstore_partial_4_15 NO_STORE

Definition at line 358 of file helpers.h.

◆ vstore_partial_4_16

#define vstore_partial_4_16 NO_STORE

Definition at line 359 of file helpers.h.

◆ vstore_partial_4_2

#define vstore_partial_4_2 vstore_partial_2

Definition at line 345 of file helpers.h.

◆ vstore_partial_4_3

#define vstore_partial_4_3 vstore_partial_3

Definition at line 346 of file helpers.h.

◆ vstore_partial_4_4

#define vstore_partial_4_4 vstore_partial_4

Definition at line 347 of file helpers.h.

◆ vstore_partial_4_5

#define vstore_partial_4_5 NO_STORE

Definition at line 348 of file helpers.h.

◆ vstore_partial_4_6

#define vstore_partial_4_6 NO_STORE

Definition at line 349 of file helpers.h.

◆ vstore_partial_4_7

#define vstore_partial_4_7 NO_STORE

Definition at line 350 of file helpers.h.

◆ vstore_partial_4_8

#define vstore_partial_4_8 NO_STORE

Definition at line 351 of file helpers.h.

◆ vstore_partial_4_9

#define vstore_partial_4_9 NO_STORE

Definition at line 352 of file helpers.h.

◆ vstore_partial_5

#define vstore_partial_5	(	DATA,
		OFFSET,
		PTR
	)

Value:

vstore_partial_4(DATA.s0123, OFFSET, PTR); \

vstore1(DATA.s4, OFFSET, PTR + 4);

vstore_partial_4

#define vstore_partial_4(DATA, OFFSET, PTR)

Definition: helpers.h:423

Definition at line 426 of file helpers.h.

◆ vstore_partial_6

#define vstore_partial_6	(	DATA,
		OFFSET,
		PTR
	)

Value:

vstore_partial_4(DATA.s0123, OFFSET, PTR); \

vstore_partial_2(DATA.s45, OFFSET, PTR + 4);

vstore_partial_4

#define vstore_partial_4(DATA, OFFSET, PTR)

Definition: helpers.h:423

Definition at line 430 of file helpers.h.

◆ vstore_partial_7

#define vstore_partial_7	(	DATA,
		OFFSET,
		PTR
	)

Value:

vstore_partial_4(DATA.s0123, OFFSET, PTR); \

vstore_partial_3(DATA.s456, OFFSET, PTR + 4);

vstore_partial_4

#define vstore_partial_4(DATA, OFFSET, PTR)

Definition: helpers.h:423

Definition at line 434 of file helpers.h.

◆ vstore_partial_8

#define vstore_partial_8	(	DATA,
		OFFSET,
		PTR
	)	vstore8(DATA.s01234567, OFFSET, PTR);

Definition at line 438 of file helpers.h.

◆ vstore_partial_8_0

#define vstore_partial_8_0 NO_STORE

Definition at line 361 of file helpers.h.

◆ vstore_partial_8_1

#define vstore_partial_8_1 vstore_partial_1

Definition at line 362 of file helpers.h.

◆ vstore_partial_8_10

#define vstore_partial_8_10 NO_STORE

Definition at line 371 of file helpers.h.

◆ vstore_partial_8_11

#define vstore_partial_8_11 NO_STORE

Definition at line 372 of file helpers.h.

◆ vstore_partial_8_12

#define vstore_partial_8_12 NO_STORE

Definition at line 373 of file helpers.h.

◆ vstore_partial_8_13

#define vstore_partial_8_13 NO_STORE

Definition at line 374 of file helpers.h.

◆ vstore_partial_8_14

#define vstore_partial_8_14 NO_STORE

Definition at line 375 of file helpers.h.

◆ vstore_partial_8_15

#define vstore_partial_8_15 NO_STORE

Definition at line 376 of file helpers.h.

◆ vstore_partial_8_16

#define vstore_partial_8_16 NO_STORE

Definition at line 377 of file helpers.h.

◆ vstore_partial_8_2

#define vstore_partial_8_2 vstore_partial_2

Definition at line 363 of file helpers.h.

◆ vstore_partial_8_3

#define vstore_partial_8_3 vstore_partial_3

Definition at line 364 of file helpers.h.

◆ vstore_partial_8_4

#define vstore_partial_8_4 vstore_partial_4

Definition at line 365 of file helpers.h.

◆ vstore_partial_8_5

#define vstore_partial_8_5 vstore_partial_5

Definition at line 366 of file helpers.h.

◆ vstore_partial_8_6

#define vstore_partial_8_6 vstore_partial_6

Definition at line 367 of file helpers.h.

◆ vstore_partial_8_7

#define vstore_partial_8_7 vstore_partial_7

Definition at line 368 of file helpers.h.

◆ vstore_partial_8_8

#define vstore_partial_8_8 vstore_partial_8

Definition at line 369 of file helpers.h.

◆ vstore_partial_8_9

#define vstore_partial_8_9 NO_STORE

Definition at line 370 of file helpers.h.

◆ vstore_partial_9

#define vstore_partial_9	(	DATA,
		OFFSET,
		PTR
	)

Value:

vstore_partial_8(DATA.s01234567, OFFSET, PTR); \

vstore1(DATA.s8, OFFSET, PTR + 8);

vstore_partial_8

#define vstore_partial_8(DATA, OFFSET, PTR)

Definition: helpers.h:438

Definition at line 441 of file helpers.h.

◆ VSTORE_PARTIAL_STR

#define VSTORE_PARTIAL_STR	(	size,
		store_size
	)	vstore_partial_##size##_##store_size

Definition at line 281 of file helpers.h.

◆ VSTORE_STR

#define VSTORE_STR ( size ) vstore##size

Definition at line 248 of file helpers.h.

Typedef Documentation

◆ Image

typedef struct Image Image

Structure to hold Image information.

◆ Tensor3D

typedef struct Tensor3D Tensor3D

Structure to hold 3D tensor information.

◆ Tensor4D

typedef struct Tensor4D Tensor4D

Structure to hold 4D tensor information.

◆ Vector

typedef struct Vector Vector

Structure to hold Vector information.

Function Documentation

◆ offset()

__global uchar* offset	(	const Image *	img,
		int	x,
		int	y
	)

inline

Get the pointer position of a Image.

Parameters

[in]	img	Pointer to the starting position of the buffer
[in]	x	Relative X position
[in]	y	Relative Y position

Definition at line 861 of file helpers.h.

References Image::ptr, Image::stride_x, and Image::stride_y.

 {
     return img->ptr + x * img->stride_x + y * img->stride_y;
 }

◆ tensor3D_index2ptr()

__global const uchar* tensor3D_index2ptr	(	const Tensor3D *	tensor,
		uint	width,
		uint	height,
		uint	depth,
		uint	index
	)

inline

Get the offset for a given linear index of a Tensor3D.

Parameters

[in]	tensor	Pointer to the starting position of the buffer
[in]	width	Width of the input tensor
[in]	height	Height of the input tensor
[in]	depth	Depth of the input tensor
[in]	index	Linear index

Definition at line 899 of file helpers.h.

References Tensor3D::offset_first_element_in_bytes, Tensor3D::ptr, Tensor3D::stride_x, Tensor3D::stride_y, and Tensor3D::stride_z.

Referenced by max_unpooling_layer_2().

 {
     uint num_elements = width * height;
 
     const uint z = index / num_elements;
 
     index %= num_elements;
 
     const uint y = index / width;
 
     index %= width;
 
     const uint x = index;
 
     return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + tensor->offset_first_element_in_bytes;
 }

◆ tensor3D_offset()

__global const uchar* tensor3D_offset	(	const Tensor3D *	tensor,
		int	x,
		int	y,
		int	z
	)

inline

Get the pointer position of a Tensor3D.

Parameters

[in]	tensor	Pointer to the starting position of the buffer
[in]	x	Relative X position
[in]	y	Relative Y position
[in]	z	Relative Z position

Definition at line 873 of file helpers.h.

References Tensor3D::ptr, Tensor3D::stride_x, Tensor3D::stride_y, and Tensor3D::stride_z.

Referenced by calculate_avg_scale(), deconvolution_upsample(), pooling_layer_2(), pooling_layer_2_nchw_indices_fp16(), pooling_layer_2_nchw_indices_fp32(), pooling_layer_3(), reshape_layer(), roi_pool_1x1(), roi_pooling_layer(), upsample_layer_nchw(), and upsample_layer_nhwc().

 {
     return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
 }

◆ tensor3D_ptr_no_update()

Tensor3D tensor3D_ptr_no_update	(	__global uchar *	ptr,
		uint	offset_first_element_in_bytes,
		uint	stride_x,
		uint	step_x,
		uint	stride_y,
		uint	step_y,
		uint	stride_z,
		uint	step_z
	)

inline

Wrap 3D tensor information into an tensor structure.

Parameters

[in]	ptr	Pointer to the starting postion of the buffer
[in]	offset_first_element_in_bytes	The offset of the first element in the source image
[in]	stride_x	Stride of the image in X dimension (in bytes)
[in]	step_x	stride_x * number of elements along X processed per workitem(in bytes)
[in]	stride_y	Stride of the image in Y dimension (in bytes)
[in]	step_y	stride_y * number of elements along Y processed per workitem(in bytes)
[in]	stride_z	Stride of the image in Z dimension (in bytes)
[in]	step_z	stride_z * number of elements along Z processed per workitem(in bytes)

Returns: A 3D tensor object

Definition at line 814 of file helpers.h.

References Vector::offset_first_element_in_bytes, Vector::ptr, Tensor3D::ptr, and Vector::stride_x.

 {
     Tensor3D tensor =
     {
         .ptr                           = ptr,
         .offset_first_element_in_bytes = offset_first_element_in_bytes,
         .stride_x                      = stride_x,
         .stride_y                      = stride_y,
         .stride_z                      = stride_z
     };
     return tensor;
 }

◆ tensor4D_offset()

__global const uchar* tensor4D_offset	(	const Tensor4D *	tensor,
		int	x,
		int	y,
		int	z,
		int	w
	)

inline

Get the pointer position of a Tensor4D.

Parameters

[in]	tensor	Pointer to the starting position of the buffer
[in]	x	Relative X position
[in]	y	Relative Y position
[in]	z	Relative Z position
[in]	w	Relative W position

Definition at line 886 of file helpers.h.

References Tensor4D::ptr, Tensor4D::stride_w, Tensor4D::stride_x, Tensor4D::stride_y, and Tensor4D::stride_z.

 {
     return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + w * tensor->stride_w;
 }

◆ update_image_from_tensor3D_workitem_ptr()

Image update_image_from_tensor3D_workitem_ptr	(	__global uchar *	ptr,
		uint	offset_first_element_in_bytes,
		uint	stride_x,
		uint	step_x,
		uint	stride_y,
		uint	step_y,
		uint	stride_z,
		uint	step_z
	)

inline

Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data.

Parameters

[in]	ptr	Pointer to the starting postion of the buffer
[in]	offset_first_element_in_bytes	The offset of the first element in the source image
[in]	stride_x	Stride of the image in X dimension (in bytes)
[in]	step_x	stride_x * number of elements along X processed per workitem(in bytes)
[in]	stride_y	Stride of the image in Y dimension (in bytes)
[in]	step_y	stride_y * number of elements along Y processed per workitem(in bytes)
[in]	stride_z	Stride of the image in Z dimension (in bytes)
[in]	step_z	stride_z * number of elements along Z processed per workitem(in bytes)

Returns: A 3D tensor object

Definition at line 761 of file helpers.h.

References Vector::offset_first_element_in_bytes, Image::offset_first_element_in_bytes, Vector::ptr, Image::ptr, and Vector::stride_x.

 {
     Image img =
     {
         .ptr                           = ptr,
         .offset_first_element_in_bytes = offset_first_element_in_bytes,
         .stride_x                      = stride_x,
         .stride_y                      = stride_y
     };
     img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
     return img;
 }

◆ update_image_workitem_ptr()

Image update_image_workitem_ptr	(	__global uchar *	ptr,
		uint	offset_first_element_in_bytes,
		uint	stride_x,
		uint	step_x,
		uint	stride_y,
		uint	step_y
	)

inline

Wrap image information into an Image structure, and make the pointer point at this workitem's data.

Parameters

[in]	ptr	Pointer to the starting postion of the buffer
[in]	offset_first_element_in_bytes	The offset of the first element in the source image
[in]	stride_x	Stride of the image in X dimension (in bytes)
[in]	step_x	stride_x * number of elements along X processed per workitem(in bytes)
[in]	stride_y	Stride of the image in Y dimension (in bytes)
[in]	step_y	stride_y * number of elements along Y processed per workitem(in bytes)

Returns: An image object

Definition at line 735 of file helpers.h.

References Vector::offset_first_element_in_bytes, Image::offset_first_element_in_bytes, Vector::ptr, Image::ptr, and Vector::stride_x.

 {
     Image img =
     {
         .ptr                           = ptr,
         .offset_first_element_in_bytes = offset_first_element_in_bytes,
         .stride_x                      = stride_x,
         .stride_y                      = stride_y
     };
     img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
     return img;
 }

◆ update_tensor3D_workitem_ptr()

Tensor3D update_tensor3D_workitem_ptr	(	__global uchar *	ptr,
		uint	offset_first_element_in_bytes,
		uint	stride_x,
		uint	step_x,
		uint	stride_y,
		uint	step_y,
		uint	stride_z,
		uint	step_z
	)

inline

Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data.

Parameters

[in]	ptr	Pointer to the starting postion of the buffer
[in]	offset_first_element_in_bytes	The offset of the first element in the source image
[in]	stride_x	Stride of the image in X dimension (in bytes)
[in]	step_x	stride_x * number of elements along X processed per workitem(in bytes)
[in]	stride_y	Stride of the image in Y dimension (in bytes)
[in]	step_y	stride_y * number of elements along Y processed per workitem(in bytes)
[in]	stride_z	Stride of the image in Z dimension (in bytes)
[in]	step_z	stride_z * number of elements along Z processed per workitem(in bytes)

Returns: A 3D tensor object

Definition at line 787 of file helpers.h.

References Vector::offset_first_element_in_bytes, Tensor3D::offset_first_element_in_bytes, Vector::ptr, Tensor3D::ptr, and Vector::stride_x.

 {
     Tensor3D tensor =
     {
         .ptr                           = ptr,
         .offset_first_element_in_bytes = offset_first_element_in_bytes,
         .stride_x                      = stride_x,
         .stride_y                      = stride_y,
         .stride_z                      = stride_z
     };
     tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
     return tensor;
 }

◆ update_tensor4D_workitem_ptr()

Tensor4D update_tensor4D_workitem_ptr	(	__global uchar *	ptr,
		uint	offset_first_element_in_bytes,
		uint	stride_x,
		uint	step_x,
		uint	stride_y,
		uint	step_y,
		uint	stride_z,
		uint	step_z,
		uint	stride_w,
		uint	step_w,
		uint	mod_size
	)

inline

Definition at line 827 of file helpers.h.

References Vector::offset_first_element_in_bytes, Tensor4D::offset_first_element_in_bytes, Vector::ptr, Tensor4D::ptr, and Vector::stride_x.

 {
     Tensor4D tensor =
     {
         .ptr                           = ptr,
         .offset_first_element_in_bytes = offset_first_element_in_bytes,
         .stride_x                      = stride_x,
         .stride_y                      = stride_y,
         .stride_z                      = stride_z,
         .stride_w                      = stride_w
     };
 
     tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + (get_global_id(2) / mod_size) * step_w;
     return tensor;
 }

◆ update_vector_workitem_ptr()

Vector update_vector_workitem_ptr	(	__global uchar *	ptr,
		uint	offset_first_element_in_bytes,
		uint	stride_x,
		uint	step_x
	)

inline

Wrap vector information into an Vector structure, and make the pointer point at this workitem's data.

Parameters

[in]	ptr	Pointer to the starting postion of the buffer
[in]	offset_first_element_in_bytes	The offset of the first element in the source vector
[in]	stride_x	Stride of the vector in X dimension (in bytes)
[in]	step_x	stride_x * number of elements along X processed per workitem(in bytes)

Returns: An image object

Definition at line 712 of file helpers.h.

References Vector::offset_first_element_in_bytes, Vector::ptr, and Vector::stride_x.

 {
     Vector vector =
     {
         .ptr                           = ptr,
         .offset_first_element_in_bytes = offset_first_element_in_bytes,
         .stride_x                      = stride_x,
     };
     vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
     return vector;
 }

◆ vector_offset()

__global const uchar* vector_offset	(	const Vector *	vec,
		int	x
	)

inline

Get the pointer position of a Vector.

Parameters

[in]	vec	Pointer to the starting position of the buffer
[in]	x	Relative X position

Definition at line 850 of file helpers.h.

References Vector::ptr, and Vector::stride_x.

Referenced by deconvolution_upsample(), and get_invsqrt_quantized_multiplier_exp().

 {
     return vec->ptr + x * vec->stride_x;
 }

Data Structures

Macros

Typedefs

Functions

Macro Definition Documentation

◆ char1

◆ CLAMP

◆ CONCAT

◆ CONVERT

◆ convert_char1

◆ convert_char1_sat

◆ convert_double1

◆ convert_double1_sat

◆ convert_float1

◆ convert_float16_sat

◆ convert_float1_sat

◆ convert_float2_sat

◆ convert_float3_sat

◆ convert_float4_sat

◆ convert_float8_sat

◆ convert_float_sat

◆ convert_half1

◆ convert_half16_sat

◆ convert_half1_sat

◆ convert_half2_sat

◆ convert_half3_sat

◆ convert_half4_sat

◆ convert_half8_sat

◆ convert_half_sat

◆ convert_int1

◆ convert_int1_sat

◆ convert_long1

◆ convert_long1_sat

◆ CONVERT_SAT

◆ CONVERT_SAT_ROUND

◆ CONVERT_SAT_ROUND_STR

◆ CONVERT_SAT_STR

◆ convert_short1

◆ convert_short1_sat

◆ CONVERT_STR

◆ CONVERT_TENSOR3D_TO_IMAGE_STRUCT [1/2]

◆ CONVERT_TENSOR3D_TO_IMAGE_STRUCT [2/2]

◆ CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP

◆ CONVERT_TO_IMAGE_STRUCT

◆ CONVERT_TO_IMAGE_STRUCT_NO_STEP

◆ CONVERT_TO_TENSOR3D_STRUCT

◆ CONVERT_TO_TENSOR3D_STRUCT_NO_STEP

◆ CONVERT_TO_TENSOR3D_STRUCT_NO_UPDATE_PTR

◆ CONVERT_TO_TENSOR4D_STRUCT

◆ CONVERT_TO_TENSOR4D_STRUCT_NO_STEP

◆ CONVERT_TO_VECTOR_STRUCT

◆ CONVERT_TO_VECTOR_STRUCT_NO_STEP

◆ convert_uchar1

◆ convert_uchar16_sat

◆ convert_uchar1_sat

◆ convert_uchar2_sat

◆ convert_uchar3_sat

◆ convert_uchar4_sat

◆ convert_uchar8_sat

◆ convert_uint1

◆ convert_uint1_sat

◆ convert_ulong1

◆ convert_ulong1_sat

◆ convert_ushort1

◆ convert_ushort1_sat

◆ CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT

◆ CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR

◆ double1

◆ EXPAND

◆ float1

◆ GPU_ARCH_BIFROST

◆ GPU_ARCH_MIDGARD

◆ half1

◆ IMAGE_DECLARATION

◆ int1

◆ long1

◆ MAX_REDUCE

◆ max_reduce_1

◆ max_reduce_16

◆ max_reduce_2