Compute Library
 22.11
gemm_helpers.h File Reference
#include "activation_float_helpers.h"
#include "helpers.h"

Go to the source code of this file.

Macros

#define COLUMN_VECTOR(K0, IDX_COL, BASENAME, BS, TYPE)
 Create column vectors to contain the values at the given index for a set of given vectors. More...
 
#define COLUMN_VECTOR_SCALAR(K0, IDX_COL, BASENAME, BS, TYPE)
 Create column vectors to contain the values at the given index. More...
 
#define TRANSPOSE_K0XN0(K0, N0, BASENAME, BS, TYPE)
 Create transposed vectors form the given source vectors. More...
 
#define SCALAR_ACCESS_STR(offset, n0, x)   scalar_access_##offset##_##n0(x)
 Utility macro to access a vector with the scalar positions. More...
 
#define SCALAR_ACCESS(offset, n0, x)   SCALAR_ACCESS_STR(offset, n0, x)
 
#define scalar_access_0_1(x)   ((x).s0)
 
#define scalar_access_0_2(x)   ((x).s01)
 
#define scalar_access_0_3(x)   ((x).s012)
 
#define scalar_access_0_4(x)   ((x).s0123)
 
#define scalar_access_0_8(x)   ((x).s01234567)
 
#define scalar_access_0_16(x)   ((x).s0123456789ABCDEF)
 
#define scalar_access_1_1(x)   ((x).s1)
 
#define scalar_access_1_2(x)   ((x).s12)
 
#define scalar_access_1_3(x)   ((x).s123)
 
#define scalar_access_1_4(x)   ((x).s1234)
 
#define scalar_access_1_8(x)   ((x).s12345678)
 
#define scalar_access_2_1(x)   ((x).s2)
 
#define scalar_access_2_2(x)   ((x).s23)
 
#define scalar_access_2_3(x)   ((x).s234)
 
#define scalar_access_2_4(x)   ((x).s2345)
 
#define scalar_access_2_8(x)   ((x).s23456789)
 
#define scalar_access_3_1(x)   ((x).s3)
 
#define scalar_access_3_2(x)   ((x).s34)
 
#define scalar_access_3_3(x)   ((x).s345)
 
#define scalar_access_3_4(x)   ((x).s3456)
 
#define scalar_access_3_8(x)   ((x).s3456789A)
 
#define scalar_access_4_1(x)   ((x).s4)
 
#define scalar_access_4_2(x)   ((x).s45)
 
#define scalar_access_4_3(x)   ((x).s456)
 
#define scalar_access_4_4(x)   ((x).s4567)
 
#define scalar_access_4_8(x)   ((x).s456789AB)
 
#define scalar_access_8_1(x)   ((x).s8)
 
#define scalar_access_8_2(x)   ((x).s89)
 
#define scalar_access_8_3(x)   ((x).s89A)
 
#define scalar_access_8_4(x)   ((x).s89AB)
 
#define scalar_access_8_8(x)   ((x).s89ABCDEF)
 
#define scalar_access_12_1(x)   ((x).sC)
 
#define scalar_access_12_2(x)   ((x).sCD)
 
#define scalar_access_12_3(x)   ((x).sCDE)
 
#define scalar_access_12_4(x)   ((x).sCDEF)
 
#define scalar_access_16_1(x)   ((x).sF)
 
LOAD_TENSOR_ROW_n

Loads the rows from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1) without allocating variables.

Parameters
[in]N0The number of columns to load
[in]DATA_TYPEThe data type of variables
[in]BASENAMEThe basename of the destination variables for the loaded rows
[in]PTRThe base pointer
[in]COL_OFFSETThe column vector offset. COL_OFFSET + N0 must be <= 16
[in]STRIDE_YThe stride value in y-axis direction
[in]ZThe z-axis offset vector
#define LOAD_TENSOR_ROW_0(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)   ({})
 
#define LOAD_TENSOR_ROW_1(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)   SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##0) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
 
#define LOAD_TENSOR_ROW_2(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_3(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_4(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_5(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_6(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_7(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_8(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_9(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_10(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_11(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_12(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_13(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_14(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_15(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR_ROW_16(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
LOAD_TENSOR

Load tensor (consecutive rows and columns) with Z offset.

Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16 The data to load is expected to have consecutive names for each row. E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2. The Z offset is expected to have consecutive names. E.g., for M0=3, and Z=zin, the expected Z offsets are zin0, zin1 and zin2.

Parameters
[in]M0The number of consecutive rows
[in]N0The number of consecutive columns
[in]DATA_TYPEThe data type of the target
[in]BASENAMEThe basename of the result variables
[in]PTRThe base pointer for the data
[in]COL_OFFSETThe column vector offset. COL_OFFSET + N0 must be <= 16
[in]STRIDE_YThe stride in y-axis direction
[in]ZThe z-axis offset vector
#define LOAD_TENSOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)   LOAD_TENSOR_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)   LOAD_TENSOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
 
LOAD_TENSOR_M0Xn

Load 2D tensor (consecutive rows and columns) with Z offset.

Parameters
[in]M0The number of rows to load [0-16]
[in]N0The number of columns to load [0-16]
[in]DATA_TYPEThe data type of variables
[in]BASENAMEThe basename of the destination variables for the loaded rows
[in]PTRThe base pointer
[in]STRIDE_YThe stride value in y-axis direction
[in]ZThe z-axis offset vector
#define LOAD_TENSOR_M0X0(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)   ({})
 
#define LOAD_TENSOR_M0X1(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)   LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
 
#define LOAD_TENSOR_M0X2(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)   LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
 
#define LOAD_TENSOR_M0X3(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)   LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
 
#define LOAD_TENSOR_M0X4(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)   LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
 
#define LOAD_TENSOR_M0X5(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)
 
#define LOAD_TENSOR_M0X6(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)
 
#define LOAD_TENSOR_M0X7(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)
 
#define LOAD_TENSOR_M0X8(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)   LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
 
#define LOAD_TENSOR_M0X9(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)
 
#define LOAD_TENSOR_M0X10(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)
 
#define LOAD_TENSOR_M0X11(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)
 
#define LOAD_TENSOR_M0X12(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)
 
#define LOAD_TENSOR_M0X13(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)
 
#define LOAD_TENSOR_M0X14(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)
 
#define LOAD_TENSOR_M0X15(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)
 
#define LOAD_TENSOR_M0X16(M0, N0, DATA_TYPE, a, input_ptr, src_stride_y, zin)   LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
 
LOAD_TENSOR_M0XN0

Load 2D tensor (consecutive rows and columns) with Z offset.

Parameters
[in]M0The number of consecutive rows [0-16]
[in]N0The number of consecutive columns [0-16]
[in]DATA_TYPEThe data type of the target
[in]BASENAMEThe basename of the result variables
[in]PTRThe base pointer for the data
[in]STRIDE_YThe stride in y-axis direction
[in]ZThe z-axis offset vector
#define LOAD_TENSOR_M0XN0_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)   LOAD_TENSOR_M0X##N0(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
 
#define LOAD_TENSOR_M0XN0(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)   LOAD_TENSOR_M0XN0_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
 
LOAD_ROW_n

Loads the rows from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).

Parameters
[in]N0The number of columns to load
[in]DATA_TYPEThe data type of variables
[in]BASENAMEThe basename of the destination variables for the loaded rows
[in]PTRThe base pointer
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride value in y-axis direction
[in]ZThe z-axis offset vector
#define LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
LOAD_BLOCK

Load Blocks (consecutive rows and columns) with Z offset.

Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16 The data to load is expected to have consecutive names for each row. E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2. The Z offset is expected to have consecutive names. E.g., for M0=3, and Z=zin, the expected Z offsets are zin0, zin1 and zin2.

Parameters
[in]M0The number of consecutive rows
[in]N0The number of consecutive columns
[in]DATA_TYPEThe data type of the target
[in]BASENAMEThe basename of the result variables
[in]PTRThe base pointer for the data
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride in y-axis direction
[in]ZThe z-axis offset vector
#define LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)   LOAD_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)   LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
LOAD_ROW_PARTIAL_n

Partially load the 0 to (n-1)th rows of the given variables

Within each row, load the lower LOAD_N0 elements of vectors of width N0

Note
in case LOAD_N0 != 1, 2, 3, 4, 8, 16, extra vload(s) will be invoked, thus incurring small performance penalty.
Parameters
[in]N0The width of the passed in vector. Supported: 1, 2, 3, 4, 8, 16
[in]LOAD_N0The lower size of the vectors to load. Supported: [1-16 and <= N0
[in]DATA_TYPEThe data type of the vectors
[in]BASENAMEThe basename of the variables
[in]PTRThe base pointer
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride value in y-axis direction
[in]ZThe offset in z-axis direction
#define LOAD_ROW_PARTIAL_1(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_2(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_3(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_4(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_5(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_6(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_7(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_8(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_9(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_10(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_11(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_12(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_13(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_14(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_15(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_ROW_PARTIAL_16(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
LOAD_BLOCK_PARTIAL

Partially load a block of the given size LOAD_M0xLOAD_N0

Note
The vector width N0 is also required for correct partial storing behaviour.
in case LOAD_N0 != 1, 2, 3, 4, 8, 16, extra vload(s) will be invoked, thus incurring small performance penalty.

The data to load is expected to have consecutive names for each row. E.g., for LOAD_M0=3 and basename=c, the expected names are c0, c1 and c2. The Z offset is expected to have consecutive names. E.g., for LOAD_M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.

Parameters
[in]LOAD_M0The number of rows to load. Supported: 1-16
[in]LOAD_N0The lower number of elements of vectors to load. Supported: 1-16 and <= N0
[in]N0The size of each vector. Supported: 1, 2, 3, 4, 8, 16
[in]DATA_TYPEThe data type of the vectors
[in]BASENAMEThe basename of the variables
[in]PTRThe base pointer
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride value in y-axis direction
[in]ZThe offset in z-axis direction
#define LOAD_BLOCK_PARTIAL_STR(LOAD_M0, LOAD_N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)   LOAD_ROW_PARTIAL_##LOAD_M0(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_BLOCK_PARTIAL(LOAD_M0, LOAD_N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)   LOAD_BLOCK_PARTIAL_STR(LOAD_M0, LOAD_N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
#define LOAD_BLOCK_PARTIAL_IN_X_AND_Y(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, PARTIAL_COND_Y, PARTIAL_COND_X)
 Load a block that can be partial in both x and y dimensions. More...
 
#define LOAD_BLOCK_PARTIAL_IN_X(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z, PARTIAL_STORE_N0, PARTIAL_COND_X)
 Load a block that can only be partial in x but not y. More...
 
#define LOAD_BLOCK_PARTIAL_IN_Y(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_COND_Y)
 Load a block that can only be partial in y but not x. More...
 
LOAD_BLOCK_BOUNDARY_AWARE

Boundary-aware GeMM block load

This macro assumes the following schemes to achieve boundary-awareness:

  • Overlapping load in Y axis from lhs tensor. This implies lhs has no padding along y dim.
  • Non-Overlapping(normal) load from rhs tensor. This imples rhs can have paddings.
  • Overlapping load in Y axis from bias tensor. This implies rhs has no padding along y dim. The macro then ensures that the src tensor can be loaded without any paddings in both x and y dim.

In the y dimension, we place the partial blocks at the beginning while in the x dimension, we place the partial blocks at the end. Say, the src tensor is of shape MxN and we have M0 and N0 as the block size, this is how we define "partial blocks"/ "boundary block" (we use the 2 terms "partial blocks" and "boundary blocks" interchangeably) and its various parameters:

*–x–> x == 0 x == 1 | |<---------------------------—N-----------------------—>| y |<-----------—N0----------—>|<-—PARTIAL_STORE_N0--—>| | ----------—#############################################################

  • | | |...............................|...........................| y == 0 | PAR_..._M0 |......Boundary block in y......|.Boundary block in x and y.| | | |...............................|...........................| M –############################################################# | | | |...........................| y == 1 | M0 | Non-boundary block |....Boundary block in x....| | | | |...........................| |---------—#############################################################

Then PARTIAL_STORE_M0 = M % M0 and PARTIAL_STORE_N0 = N % N0

Note
in cases PARTIAL_STORE_N0 != 1, 2, 3, 4, 8, 16, extra vload(s) will be invoked, thus incurring small performance penalty.

It automatically detects if a giving M,N,M0,N0 combination can yield partial blocks in either X and Y dimension, and select corresponding load methods such that the boundary detection logic is only added when needed.

The data to load is expected to have consecutive names for each row. E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2. The Z offset is expected to have consecutive names. E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.

The macro will result in a declaration of M0 vectors of size N0 with data type DATA_TYPE containing values partially loaded from the specified address in memory. The remaining (N0 - PARTIAL_STORE_N0) elements will be filled with zeros.

Parameters
[in]M0The number of rows to load, for non-partial blocks. Supported: 1-16
[in]N0The size of each vector, for non-partial blocks. Supported: 1, 2, 3, 4, 8, 16
[in]DATA_TYPEThe data type of the vectors
[in]BASENAMEThe basename of the variables
[in]PTRThe base pointer
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride value in y-axis direction
[in]ZThe offset in z-axis direction
[in]PARTIAL_STORE_M0The partial size in y, for partial blocks. Supported: [0, M0)
[in]PARTIAL_STORE_N0The partial size in x, for partial blocks. Supported: [0, N0)
[in]PARTIAL_COND_YCondition on the y axis to perform the partial load Y. True to use PARTIAL_STORE_M0 rather than M0.
[in]PARTIAL_COND_XCondition on the x axis to perform the partial load X. True to use PARTIAL_STORE_N0 rather than N0.
#define LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, PARTIAL_COND_Y, PARTIAL_COND_X)   LOAD_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
 
LOAD_TEXTURE2D_ROW_n

Loads the rows from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).

Parameters
[in]N0The number of pixels to read
[in]DATA_TYPEThe data type of variables
[in]BASENAMEThe basename of the destination variables for the loaded rows
[in]IMGThe 2D OpenCL image object
[in]X_COORDThe x coordinate for the top-left pixel
[in]Y_COORDThe y coordinate for the top-left pixel
[in]X_STEP_ROWThe incremental step row for the x coordinate (in pixels)
[in]Y_STEP_ROWThe incremental step row for the y coordinate (in pixels)
#define LOAD_TEXTURE2D_ROW_1(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)   BASENAME##0 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 0 * X_STEP_ROW), (Y_COORD + 0 * Y_STEP_ROW))
 
#define LOAD_TEXTURE2D_ROW_2(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_3(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_4(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_5(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_6(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_7(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_8(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_9(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_10(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_11(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_12(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_13(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_14(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_15(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D_ROW_16(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
LOAD_TEXTURE2D

Load a 2D texture in unit of pixel.

A pixel is made of 4 floating point values

Supported cases are M0=1,2,3,...,16 and N0=1 The data to load is expected to have consecutive names for each row. E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.

Parameters
[in]M0The number of consecutive rows
[in]N0The number of consecutive pixels. Only 1, 2 and 4 are supported
[in]DATA_TYPEThe data type of the target
[in]BASENAMEThe basename of the result variables
[in]IMGThe 2D OpenCL image object
[in]X_COORDThe x coordinate for the top-left pixel
[in]Y_COORDThe y coordinate for the top-left pixel
[in]X_STEP_ROWThe incremental step row for the x coordinate (in pixels)
[in]Y_STEP_ROWThe incremental step row for the y coordinate (in pixels)
#define LOAD_TEXTURE2D_STR(M0, N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)   LOAD_TEXTURE2D_ROW_##M0(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
#define LOAD_TEXTURE2D(M0, N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)   LOAD_TEXTURE2D_STR(M0, N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
 
LOAD_ROW_INDIRECT_n

Loads the rows from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1) passing the Y index for each row to be loaded.

Parameters
[in]N0The number of columns to load
[in]DATA_TYPEThe data type of variables
[in]BASENAMEThe basename of the destination variables for the loaded rows
[in]PTRThe base pointer
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride value in y-axis direction
[in]YThe y-axis offset vector
[in]Y_MASKThe y-axis mask vector. If 0, forces BASENAMEn to 0
#define LOAD_ROW_INDIRECT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_ROW_INDIRECT_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
LOAD_BLOCK_INDIRECT

Load blocks (consecutive rows and columns) with Y offset.

Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16 The data to load is expected to have consecutive names for each row. E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2. The Z offset is expected to have consecutive names. E.g., for M0=3, and Z=zin, the expected Z offsets are zin0, zin1 and zin2.

Parameters
[in]M0The number of consecutive rows
[in]N0The number of consecutive columns
[in]DATA_TYPEThe data type of the target
[in]BASENAMEThe basename of the result variables
[in]PTRThe base pointer for the data
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride in y-axis direction
[in]YThe y-axis offset vector
[in]Y_MASKThe y-axis mask vector. If 0, forces BASENAMEn to 0
#define LOAD_BLOCK_INDIRECT_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)   LOAD_ROW_INDIRECT_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
#define LOAD_BLOCK_INDIRECT(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)   LOAD_BLOCK_INDIRECT_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
 
LOAD_ELEMENT_n

Loads the elements from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).

Parameters
[in]N0The number of rows to load
[in]DATA_TYPEThe data type of variables
[in]BASENAMEThe basename of the destination variables for the loaded rows
[in]PTRThe base pointer
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride value in y-axis direction
#define LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_ELEMENT_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
LOAD_SCALAR_AS_VECTOR

Load Scalar as Vector (consecutive elements).

Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16 The data to load is expected to have consecutive names for each row. E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.

Parameters
[in]M0The number of consecutive rows
[in]N0The number of consecutive columns
[in]DATA_TYPEThe data type of the target
[in]BASENAMEThe basename of the result variables
[in]PTRThe base pointer for the data
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride in y-axis direction
#define LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)   LOAD_ELEMENT_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
#define LOAD_SCALAR_AS_VECTOR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)   LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
 
CALCULATE_Z_OFFSET_n

Basic macros to calculate Z offset values from Z0 to Zn-1

Parameters
[in]M0The number of offset values to calculate
[in]DATA_TYPEThe data type of the results
[in]ZThe basename of the result variables
[in]YThe work-itme ID of y-axis
[in]HEIGHT_GEMM3DThe height of GEMM3D
[in]DEPTH_GEMM3DThe depth of GEMM3D
[in]CROSS_PLANE_PADThe padding required for plane changes accross the z-dimension
[in]STRIDE_YThe stride value in y-axis direction
#define CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
 
#define CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
 
#define CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
 
#define CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
 
#define CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
 
#define CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
 
#define CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
 
#define CALCULATE_Z_OFFSET_8(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
 
CALCULATE_Z_OFFSET

Calculate Z offset values from Z0 to Zn-1

The Z offsets are expected to have consecutive names.

E.g., for M0=3 and Z=zin, the expected names of Z offsets are zin1, zin2, zin3. Note that, CROSS_PLANE_PAD (cross plain padding) is required to take into account the possible cross plane paddings in case of the plance changes across the z-dimension.

Parameters
[in]M0The number of offset values to calculate
[in]DATA_TYPEThe data type of the results
[in]ZThe basename of the result variables
[in]YThe work-itme ID of y-axis
[in]HEIGHT_GEMM3DThe height of GEMM3D
[in]DEPTH_GEMM3DThe depth of GEMM3D
[in]CROSS_PLANE_PADThe padding required for plane changes accross the z-dimension
[in]STRIDE_YThe stride value in y-axis direction
#define CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)   CALCULATE_Z_OFFSET_##M0(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
 
#define CALCULATE_Z_OFFSET(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)   CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
 
SCALE_ROW_n

Scale the rows in the given variables (BASENAME0 to BASENAMEn-1)

Parameters
[in]DATA_TYPEThe data type of the variables
[in]BASENAMEThe basename of the variables
[in]SCALEThe scale factor
#define SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE)   BASENAME##0 *= (DATA_TYPE)SCALE;
 
#define SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_ROW_16(DATA_TYPE, BASENAME, SCALE)
 
SCALE_BLOCK

Scale elements stored in a block (BASENAME)

Supported cases are N=1,2,3,...,16

Parameters
[in]NThe number of rows in the block
[in]DATA_TYPEThe data type of the block
[in]BASENAMEThe basename of the block
[in]SCALEThe scale factor
#define SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE)   SCALE_ROW_##N(DATA_TYPE, BASENAME, SCALE)
 
#define SCALE_BLOCK(N, DATA_TYPE, BASENAME, SCALE)   SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE)
 
COLUMN_VECTORn

Create a new vector containing the values at the given index for a set of given vectors

Parameters
[in]IDX_COLThe index value
[in]BASENAMEThe basename of the destination vectors
[in]XThe basename of the source vectors
[in]TYPEThe data type of the destination vectors
#define COLUMN_VECTOR1(IDX_COL, BASENAME, X, TYPE)   TYPE BASENAME##IDX_COL = (TYPE)((X##0).s##IDX_COL);
 
#define COLUMN_VECTOR2(IDX_COL, BASENAME, X, TYPE)
 
#define COLUMN_VECTOR3(IDX_COL, BASENAME, X, TYPE)
 
#define COLUMN_VECTOR4(IDX_COL, BASENAME, X, TYPE)
 
#define COLUMN_VECTOR8(IDX_COL, BASENAME, X, TYPE)
 
#define COLUMN_VECTOR16(IDX_COL, BASENAME, X, TYPE)
 
COLUMN_VECTOR_SCALARn

Create a new vector containing the values at the given index.

Utility macros for transposing a colum-vector

Parameters
[in]IDX_COLThe index value
[in]BASENAMEThe basename of the destination vectors
[in]XThe basename of the source vectors
[in]TYPEThe data type of the destination vectors
#define COLUMN_VECTOR_SCALAR1(IDX_COL, BASENAME, X, TYPE)   TYPE BASENAME##IDX_COL = (TYPE)((X##0));
 
#define COLUMN_VECTOR_SCALAR2(IDX_COL, BASENAME, X, TYPE)
 
#define COLUMN_VECTOR_SCALAR3(IDX_COL, BASENAME, X, TYPE)
 
#define COLUMN_VECTOR_SCALAR4(IDX_COL, BASENAME, X, TYPE)
 
#define COLUMN_VECTOR_SCALAR8(IDX_COL, BASENAME, X, TYPE)
 
#define COLUMN_VECTOR_SCALAR16(IDX_COL, BASENAME, X, TYPE)
 
TRANSPOSE_K0Xn

Create transposed vectors of the given vectors

Parameters
[in]K0The size of the source vectors
[in]BASENAMEThe basename of transposed vectors
[in]BSThe basename of source vectors for transposition
[in]TYPEThe data type of the transposed vectors
#define TRANSPOSE_K0X1(K0, BASENAME, BS, TYPE)   COLUMN_VECTOR_SCALAR(K0, 0, BASENAME, BS, TYPE);
 
#define TRANSPOSE_K0X2(K0, BASENAME, BS, TYPE)
 
#define TRANSPOSE_K0X3(K0, BASENAME, BS, TYPE)
 
#define TRANSPOSE_K0X4(K0, BASENAME, BS, TYPE)
 
#define TRANSPOSE_K0X8(K0, BASENAME, BS, TYPE)
 
#define TRANSPOSE_K0X16(K0, BASENAME, BS, TYPE)
 
ADD_ROW_n

Add the variables (BIAS0 to BIASn-1) to the others (BASENAME0 to BASENAMEn-1)

Parameters
[in]BASENAMEThe basename of the destination variables
[in]BIASThe basename of the added variables
#define ADD_ROW_1(BASENAME, BIAS)   BASENAME##0 += BIAS##0;
 
#define ADD_ROW_2(BASENAME, BIAS)
 
#define ADD_ROW_3(BASENAME, BIAS)
 
#define ADD_ROW_4(BASENAME, BIAS)
 
#define ADD_ROW_5(BASENAME, BIAS)
 
#define ADD_ROW_6(BASENAME, BIAS)
 
#define ADD_ROW_7(BASENAME, BIAS)
 
#define ADD_ROW_8(BASENAME, BIAS)
 
#define ADD_ROW_9(BASENAME, BIAS)
 
#define ADD_ROW_10(BASENAME, BIAS)
 
#define ADD_ROW_11(BASENAME, BIAS)
 
#define ADD_ROW_12(BASENAME, BIAS)
 
#define ADD_ROW_13(BASENAME, BIAS)
 
#define ADD_ROW_14(BASENAME, BIAS)
 
#define ADD_ROW_15(BASENAME, BIAS)
 
#define ADD_ROW_16(BASENAME, BIAS)
 
ADD_BLOCK

Add the block (BIAS) to another block (BASENAME)

Supported cases are N=1,2,3,...,16

Parameters
[in]NThe number of vectors in the block
[in]BASENAMEThe basename of the destination variables
[in]BIASThe basename of the added variables
#define ADD_BLOCK_STR(N, BASENAME, BIAS)   ADD_ROW_##N(BASENAME, BIAS)
 
#define ADD_BLOCK(N, BASENAME, BIAS)   ADD_BLOCK_STR(N, BASENAME, BIAS)
 
ADD_ROW_BROADCAST_n

Broadcast (add single value) to the each element of the destination variables

Parameters
[in]BASENAMEThe basename of the destination variables
[in]BIASThe variable containing the value to add
#define ADD_ROW_BROADCAST_1(BASENAME, BIAS)   BASENAME##0 += BIAS;
 
#define ADD_ROW_BROADCAST_2(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_3(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_4(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_5(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_6(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_7(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_8(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_9(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_10(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_11(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_12(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_13(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_14(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_15(BASENAME, BIAS)
 
#define ADD_ROW_BROADCAST_16(BASENAME, BIAS)
 
ADD_BLOCK_BROADCAST

Broadcast (add a value) to the each element of the destination block (BASENAME)

Supported cases are N=1,2,3,...,16.

Parameters
[in]NThe number of vectors in the block
[in]BASENAMEThe basename of the destination variables
[in]BIASThe variable containing the value to add
#define ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS)   ADD_ROW_BROADCAST_##N(BASENAME, BIAS)
 
#define ADD_BLOCK_BROADCAST(N, BASENAME, BIAS)   ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS)
 
ACTIVATION_ROW_n

Apply activation to the given variables

Parameters
[in]ACTIVATION_TYPEThe type of the activation
[in]DATA_TYPEThe data type of the vectors
[in]BASENAMEThe basename of the variables
[in]A_VALAdditional value required by the activation
[in]B_VALAdditional value required by the activation
#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)   BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##0, A_VAL, B_VAL);
 
#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_ROW_16(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
ACTIVATION_BLOCK

Apply activation to a block (BASENAME)

Supported cases are N=1,2,3,...,16.

Parameters
[in]NThe number of vectors in the block
[in]ACTIVATION_TYPEThe type of the activation
[in]DATA_TYPEThe data type of the vectors
[in]BASENAMEThe basename of the variables
[in]A_VALAdditional value required by the activation
[in]B_VALAdditional value required by the activation
#define ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)   ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
#define ACTIVATION_BLOCK(N, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)   ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
 
CONVERT_ROW_n

Apply convert_<data_type> to the given variables

Parameters
[in]NThe size of the vectors
[in]DATA_TYPEThe data type of the vectors
[in]BASENAME_SRCThe basename of the source variables
[in]BASENAME_DSTThe basename of the destination variables
#define CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_ROW_16(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
CONVERT_BLOCK

Apply convert_<data_type> to a block (BASENAME_SRC) and save to another block (BASENAME_DST)

Supported cases N=1,2,3,...,16.

Parameters
[in]MThe number of vectors to convert
[in]NThe size of the vectors
[in]DATA_TYPEThe data type of the vectors
[in]BASENAME_SRCThe basename of the source variables
[in]BASENAME_DSTThe basename of the destination variables
#define CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)   CONVERT_ROW_##M(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 
#define CONVERT_BLOCK(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)   CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
 

Macro Definition Documentation

◆ ACTIVATION_BLOCK

#define ACTIVATION_BLOCK (   N,
  ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)    ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)

Definition at line 1667 of file gemm_helpers.h.

◆ ACTIVATION_BLOCK_STR

#define ACTIVATION_BLOCK_STR (   N,
  ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)    ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)

Definition at line 1666 of file gemm_helpers.h.

◆ ACTIVATION_ROW_1

#define ACTIVATION_ROW_1 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)    BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##0, A_VAL, B_VAL);

Definition at line 1589 of file gemm_helpers.h.

◆ ACTIVATION_ROW_10

#define ACTIVATION_ROW_10 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##9 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##9, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1624 of file gemm_helpers.h.

◆ ACTIVATION_ROW_11

#define ACTIVATION_ROW_11 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##A = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##A, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1628 of file gemm_helpers.h.

◆ ACTIVATION_ROW_12

#define ACTIVATION_ROW_12 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##B = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##B, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1632 of file gemm_helpers.h.

◆ ACTIVATION_ROW_13

#define ACTIVATION_ROW_13 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##C = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##C, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1636 of file gemm_helpers.h.

◆ ACTIVATION_ROW_14

#define ACTIVATION_ROW_14 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##D = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##D, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1640 of file gemm_helpers.h.

◆ ACTIVATION_ROW_15

#define ACTIVATION_ROW_15 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##E = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##E, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1644 of file gemm_helpers.h.

◆ ACTIVATION_ROW_16

#define ACTIVATION_ROW_16 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##F = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##F, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1648 of file gemm_helpers.h.

◆ ACTIVATION_ROW_2

#define ACTIVATION_ROW_2 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##1 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##1, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1592 of file gemm_helpers.h.

◆ ACTIVATION_ROW_3

#define ACTIVATION_ROW_3 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##2 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##2, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1596 of file gemm_helpers.h.

◆ ACTIVATION_ROW_4

#define ACTIVATION_ROW_4 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##3 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##3, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1600 of file gemm_helpers.h.

◆ ACTIVATION_ROW_5

#define ACTIVATION_ROW_5 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##4 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##4, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1604 of file gemm_helpers.h.

◆ ACTIVATION_ROW_6

#define ACTIVATION_ROW_6 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##5 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##5, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1608 of file gemm_helpers.h.

◆ ACTIVATION_ROW_7

#define ACTIVATION_ROW_7 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##6 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##6, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1612 of file gemm_helpers.h.

◆ ACTIVATION_ROW_8

#define ACTIVATION_ROW_8 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##7 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##7, A_VAL, B_VAL);
#define VEC_SIZE
#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1616 of file gemm_helpers.h.

◆ ACTIVATION_ROW_9

#define ACTIVATION_ROW_9 (   ACTIVATION_TYPE,
  DATA_TYPE,
  VEC_SIZE,
  BASENAME,
  A_VAL,
  B_VAL 
)
Value:
ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL) \
BASENAME##8 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME##8, A_VAL, B_VAL);
#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, BASENAME, A_VAL, B_VAL)
#define VEC_SIZE
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Definition at line 1620 of file gemm_helpers.h.

◆ ADD_BLOCK

#define ADD_BLOCK (   N,
  BASENAME,
  BIAS 
)    ADD_BLOCK_STR(N, BASENAME, BIAS)

Definition at line 1492 of file gemm_helpers.h.

◆ ADD_BLOCK_BROADCAST

#define ADD_BLOCK_BROADCAST (   N,
  BASENAME,
  BIAS 
)    ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS)

Definition at line 1576 of file gemm_helpers.h.

◆ ADD_BLOCK_BROADCAST_STR

#define ADD_BLOCK_BROADCAST_STR (   N,
  BASENAME,
  BIAS 
)    ADD_ROW_BROADCAST_##N(BASENAME, BIAS)

Definition at line 1575 of file gemm_helpers.h.

◆ ADD_BLOCK_STR

#define ADD_BLOCK_STR (   N,
  BASENAME,
  BIAS 
)    ADD_ROW_##N(BASENAME, BIAS)

Definition at line 1491 of file gemm_helpers.h.

◆ ADD_ROW_1

#define ADD_ROW_1 (   BASENAME,
  BIAS 
)    BASENAME##0 += BIAS##0;

Definition at line 1416 of file gemm_helpers.h.

◆ ADD_ROW_10

#define ADD_ROW_10 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_9(BASENAME, BIAS) \
BASENAME##9 += BIAS##9;
#define ADD_ROW_9(BASENAME, BIAS)

Definition at line 1451 of file gemm_helpers.h.

◆ ADD_ROW_11

#define ADD_ROW_11 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_10(BASENAME, BIAS) \
BASENAME##A += BIAS##A;
#define ADD_ROW_10(BASENAME, BIAS)

Definition at line 1455 of file gemm_helpers.h.

◆ ADD_ROW_12

#define ADD_ROW_12 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_11(BASENAME, BIAS) \
BASENAME##B += BIAS##B;
#define ADD_ROW_11(BASENAME, BIAS)

Definition at line 1459 of file gemm_helpers.h.

◆ ADD_ROW_13

#define ADD_ROW_13 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_12(BASENAME, BIAS) \
BASENAME##C += BIAS##C;
#define ADD_ROW_12(BASENAME, BIAS)

Definition at line 1463 of file gemm_helpers.h.

◆ ADD_ROW_14

#define ADD_ROW_14 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_13(BASENAME, BIAS) \
BASENAME##D += BIAS##D;
#define ADD_ROW_13(BASENAME, BIAS)

Definition at line 1467 of file gemm_helpers.h.

◆ ADD_ROW_15

#define ADD_ROW_15 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_14(BASENAME, BIAS) \
BASENAME##E += BIAS##E;
#define ADD_ROW_14(BASENAME, BIAS)

Definition at line 1471 of file gemm_helpers.h.

◆ ADD_ROW_16

#define ADD_ROW_16 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_15(BASENAME, BIAS) \
BASENAME##F += BIAS##F;
#define ADD_ROW_15(BASENAME, BIAS)

Definition at line 1475 of file gemm_helpers.h.

◆ ADD_ROW_2

#define ADD_ROW_2 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_1(BASENAME, BIAS) \
BASENAME##1 += BIAS##1;
#define ADD_ROW_1(BASENAME, BIAS)

Definition at line 1419 of file gemm_helpers.h.

◆ ADD_ROW_3

#define ADD_ROW_3 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_2(BASENAME, BIAS) \
BASENAME##2 += BIAS##2;
#define ADD_ROW_2(BASENAME, BIAS)

Definition at line 1423 of file gemm_helpers.h.

◆ ADD_ROW_4

#define ADD_ROW_4 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_3(BASENAME, BIAS) \
BASENAME##3 += BIAS##3;
#define ADD_ROW_3(BASENAME, BIAS)

Definition at line 1427 of file gemm_helpers.h.

◆ ADD_ROW_5

#define ADD_ROW_5 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_4(BASENAME, BIAS) \
BASENAME##4 += BIAS##4;
#define ADD_ROW_4(BASENAME, BIAS)

Definition at line 1431 of file gemm_helpers.h.

◆ ADD_ROW_6

#define ADD_ROW_6 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_5(BASENAME, BIAS) \
BASENAME##5 += BIAS##5;
#define ADD_ROW_5(BASENAME, BIAS)

Definition at line 1435 of file gemm_helpers.h.

◆ ADD_ROW_7

#define ADD_ROW_7 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_6(BASENAME, BIAS) \
BASENAME##6 += BIAS##6;
#define ADD_ROW_6(BASENAME, BIAS)

Definition at line 1439 of file gemm_helpers.h.

◆ ADD_ROW_8

#define ADD_ROW_8 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_7(BASENAME, BIAS) \
BASENAME##7 += BIAS##7;
#define ADD_ROW_7(BASENAME, BIAS)

Definition at line 1443 of file gemm_helpers.h.

◆ ADD_ROW_9

#define ADD_ROW_9 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_8(BASENAME, BIAS) \
BASENAME##8 += BIAS##8;
#define ADD_ROW_8(BASENAME, BIAS)

Definition at line 1447 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_1

#define ADD_ROW_BROADCAST_1 (   BASENAME,
  BIAS 
)    BASENAME##0 += BIAS;

Definition at line 1502 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_10

#define ADD_ROW_BROADCAST_10 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
BASENAME##9 += BIAS;
#define ADD_ROW_BROADCAST_9(BASENAME, BIAS)

Definition at line 1537 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_11

#define ADD_ROW_BROADCAST_11 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
BASENAME##A += BIAS;
#define ADD_ROW_BROADCAST_10(BASENAME, BIAS)

Definition at line 1541 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_12

#define ADD_ROW_BROADCAST_12 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
BASENAME##B += BIAS;
#define ADD_ROW_BROADCAST_11(BASENAME, BIAS)

Definition at line 1545 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_13

#define ADD_ROW_BROADCAST_13 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
BASENAME##C += BIAS;
#define ADD_ROW_BROADCAST_12(BASENAME, BIAS)

Definition at line 1549 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_14

#define ADD_ROW_BROADCAST_14 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
BASENAME##D += BIAS;
#define ADD_ROW_BROADCAST_13(BASENAME, BIAS)

Definition at line 1553 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_15

#define ADD_ROW_BROADCAST_15 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
BASENAME##E += BIAS;
#define ADD_ROW_BROADCAST_14(BASENAME, BIAS)

Definition at line 1557 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_16

#define ADD_ROW_BROADCAST_16 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
BASENAME##F += BIAS;
#define ADD_ROW_BROADCAST_15(BASENAME, BIAS)

Definition at line 1561 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_2

#define ADD_ROW_BROADCAST_2 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_1(BASENAME, BIAS) \
BASENAME##1 += BIAS;
#define ADD_ROW_BROADCAST_1(BASENAME, BIAS)

Definition at line 1505 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_3

#define ADD_ROW_BROADCAST_3 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
BASENAME##2 += BIAS;
#define ADD_ROW_BROADCAST_2(BASENAME, BIAS)

Definition at line 1509 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_4

#define ADD_ROW_BROADCAST_4 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
BASENAME##3 += BIAS;
#define ADD_ROW_BROADCAST_3(BASENAME, BIAS)

Definition at line 1513 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_5

#define ADD_ROW_BROADCAST_5 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
BASENAME##4 += BIAS;
#define ADD_ROW_BROADCAST_4(BASENAME, BIAS)

Definition at line 1517 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_6

#define ADD_ROW_BROADCAST_6 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
BASENAME##5 += BIAS;
#define ADD_ROW_BROADCAST_5(BASENAME, BIAS)

Definition at line 1521 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_7

#define ADD_ROW_BROADCAST_7 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
BASENAME##6 += BIAS;
#define ADD_ROW_BROADCAST_6(BASENAME, BIAS)

Definition at line 1525 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_8

#define ADD_ROW_BROADCAST_8 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
BASENAME##7 += BIAS;
#define ADD_ROW_BROADCAST_7(BASENAME, BIAS)

Definition at line 1529 of file gemm_helpers.h.

◆ ADD_ROW_BROADCAST_9

#define ADD_ROW_BROADCAST_9 (   BASENAME,
  BIAS 
)
Value:
ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
BASENAME##8 += BIAS;
#define ADD_ROW_BROADCAST_8(BASENAME, BIAS)

Definition at line 1533 of file gemm_helpers.h.

◆ CALCULATE_Z_OFFSET

#define CALCULATE_Z_OFFSET (   M0,
  DATA_TYPE,
  Z,
  Y,
  HEIGHT_GEMM3D,
  DEPTH_GEMM3D,
  CROSS_PLANE_PAD,
  STRIDE_Y 
)    CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)

Definition at line 1187 of file gemm_helpers.h.

◆ CALCULATE_Z_OFFSET_1

#define CALCULATE_Z_OFFSET_1 (   M0,
  DATA_TYPE,
  Z,
  Y,
  HEIGHT_GEMM3D,
  DEPTH_GEMM3D,
  CROSS_PLANE_PAD,
  STRIDE_Y 
)
Value:
Z##0 = (0 + (DATA_TYPE)(Y)) / (DATA_TYPE)HEIGHT_GEMM3D; \
Z##0 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##0); \
Z##0 *= (CROSS_PLANE_PAD * STRIDE_Y);

Definition at line 1105 of file gemm_helpers.h.

◆ CALCULATE_Z_OFFSET_2

#define CALCULATE_Z_OFFSET_2 (   M0,
  DATA_TYPE,
  Z,
  Y,
  HEIGHT_GEMM3D,
  DEPTH_GEMM3D,
  CROSS_PLANE_PAD,
  STRIDE_Y 
)
Value:
CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Z##1 = (1 + (DATA_TYPE)(Y)) / (DATA_TYPE)HEIGHT_GEMM3D; \
Z##1 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##1); \
Z##1 *= (CROSS_PLANE_PAD * STRIDE_Y);
#define CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)

Definition at line 1110 of file gemm_helpers.h.

◆ CALCULATE_Z_OFFSET_3

#define CALCULATE_Z_OFFSET_3 (   M0,
  DATA_TYPE,
  Z,
  Y,
  HEIGHT_GEMM3D,
  DEPTH_GEMM3D,
  CROSS_PLANE_PAD,
  STRIDE_Y 
)
Value:
CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Z##2 = (2 + (DATA_TYPE)(Y)) / (DATA_TYPE)HEIGHT_GEMM3D; \
Z##2 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##2); \
Z##2 *= (CROSS_PLANE_PAD * STRIDE_Y);
#define CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)

Definition at line 1116 of file gemm_helpers.h.

◆ CALCULATE_Z_OFFSET_4

#define CALCULATE_Z_OFFSET_4 (   M0,
  DATA_TYPE,
  Z,
  Y,
  HEIGHT_GEMM3D,
  DEPTH_GEMM3D,
  CROSS_PLANE_PAD,
  STRIDE_Y 
)
Value:
CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Z##3 = (3 + (DATA_TYPE)(Y)) / (DATA_TYPE)HEIGHT_GEMM3D; \
Z##3 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##3); \
Z##3 *= (CROSS_PLANE_PAD * STRIDE_Y);
#define CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)

Definition at line 1122 of file gemm_helpers.h.

◆ CALCULATE_Z_OFFSET_5

#define CALCULATE_Z_OFFSET_5 (   M0,
  DATA_TYPE,
  Z,
  Y,
  HEIGHT_GEMM3D,
  DEPTH_GEMM3D,
  CROSS_PLANE_PAD,
  STRIDE_Y 
)
Value:
CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Z##4 = (4 + (DATA_TYPE)(Y)) / (DATA_TYPE)HEIGHT_GEMM3D; \
Z##4 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##4); \
Z##4 *= (CROSS_PLANE_PAD * STRIDE_Y);
#define CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)

Definition at line 1128 of file gemm_helpers.h.

◆ CALCULATE_Z_OFFSET_6

#define CALCULATE_Z_OFFSET_6 (   M0,
  DATA_TYPE,
  Z,
  Y,
  HEIGHT_GEMM3D,
  DEPTH_GEMM3D,
  CROSS_PLANE_PAD,
  STRIDE_Y 
)
Value:
CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Z##5 = (5 + (DATA_TYPE)(Y)) / (DATA_TYPE)HEIGHT_GEMM3D; \
Z##5 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##5); \
Z##5 *= (CROSS_PLANE_PAD * STRIDE_Y);
#define CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)

Definition at line 1134 of file gemm_helpers.h.

◆ CALCULATE_Z_OFFSET_7

#define CALCULATE_Z_OFFSET_7 (   M0,
  DATA_TYPE,
  Z,
  Y,
  HEIGHT_GEMM3D,
  DEPTH_GEMM3D,
  CROSS_PLANE_PAD,
  STRIDE_Y 
)
Value:
CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Z##6 = (6 + (DATA_TYPE)(Y)) / (DATA_TYPE)HEIGHT_GEMM3D; \
Z##6 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##6); \
Z##6 *= (CROSS_PLANE_PAD * STRIDE_Y);
#define CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)

Definition at line 1140 of file gemm_helpers.h.

◆ CALCULATE_Z_OFFSET_8

#define CALCULATE_Z_OFFSET_8 (   M0,
  DATA_TYPE,
  Z,
  Y,
  HEIGHT_GEMM3D,
  DEPTH_GEMM3D,
  CROSS_PLANE_PAD,
  STRIDE_Y 
)
Value:
CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Z##7 = (7 + (DATA_TYPE)(Y)) / (DATA_TYPE)HEIGHT_GEMM3D; \
Z##7 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##7); \
Z##7 *= (CROSS_PLANE_PAD * STRIDE_Y);
#define CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)

Definition at line 1146 of file gemm_helpers.h.

◆ CALCULATE_Z_OFFSET_STR

#define CALCULATE_Z_OFFSET_STR (   M0,
  DATA_TYPE,
  Z,
  Y,
  HEIGHT_GEMM3D,
  DEPTH_GEMM3D,
  CROSS_PLANE_PAD,
  STRIDE_Y 
)    CALCULATE_Z_OFFSET_##M0(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)

Definition at line 1186 of file gemm_helpers.h.

◆ COLUMN_VECTOR

#define COLUMN_VECTOR (   K0,
  IDX_COL,
  BASENAME,
  BS,
  TYPE 
)
Value:
(IDX_COL, BASENAME, BS, TYPE);
#define TYPE
#define COLUMN_VECTOR(K0, IDX_COL, BASENAME, BS, TYPE)
Create column vectors to contain the values at the given index for a set of given vectors...
#define CONCAT(a, b)
Concatenate two inputs.
Definition: helpers.h:56

Create column vectors to contain the values at the given index for a set of given vectors.

Parameters
[in]K0The number of source vectors
[in]IDX_COLThe index value
[in]BASENAMEThe basename of the destination vectors
[in]BSThe basename of the source vectors
[in]TYPEThe data type of the destination vectors

Definition at line 1380 of file gemm_helpers.h.

◆ COLUMN_VECTOR1

#define COLUMN_VECTOR1 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)    TYPE BASENAME##IDX_COL = (TYPE)((X##0).s##IDX_COL);

Definition at line 1286 of file gemm_helpers.h.

◆ COLUMN_VECTOR16

#define COLUMN_VECTOR16 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)
Value:
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL, (X##8).s##IDX_COL, (X##9).s##IDX_COL, (X##A).s##IDX_COL, (X##B).s##IDX_COL, (X##C).s##IDX_COL, (X##D).s##IDX_COL, (X##E).s##IDX_COL, (X##F).s##IDX_COL);
#define TYPE
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1300 of file gemm_helpers.h.

◆ COLUMN_VECTOR2

#define COLUMN_VECTOR2 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)
Value:
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0).s##IDX_COL, (X##1).s##IDX_COL);
#define TYPE
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1288 of file gemm_helpers.h.

◆ COLUMN_VECTOR3

#define COLUMN_VECTOR3 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)
Value:
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 3))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL);
#define TYPE
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1291 of file gemm_helpers.h.

◆ COLUMN_VECTOR4

#define COLUMN_VECTOR4 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)
Value:
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL);
#define TYPE
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1294 of file gemm_helpers.h.

◆ COLUMN_VECTOR8

#define COLUMN_VECTOR8 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)
Value:
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 8))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL);
#define TYPE
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1297 of file gemm_helpers.h.

◆ COLUMN_VECTOR_SCALAR

#define COLUMN_VECTOR_SCALAR (   K0,
  IDX_COL,
  BASENAME,
  BS,
  TYPE 
)
Value:
(IDX_COL, BASENAME, BS, TYPE);
#define TYPE
#define CONCAT(a, b)
Concatenate two inputs.
Definition: helpers.h:56
#define COLUMN_VECTOR_SCALAR(K0, IDX_COL, BASENAME, BS, TYPE)
Create column vectors to contain the values at the given index.

Create column vectors to contain the values at the given index.

Utility macro for transposing a column-vector

Parameters
[in]K0The number of source vectors
[in]IDX_COLThe index value
[in]BASENAMEThe basename of the destination vectors
[in]BSThe basename of the source vectors
[in]TYPEThe data type of the destination vectors

Definition at line 1392 of file gemm_helpers.h.

◆ COLUMN_VECTOR_SCALAR1

#define COLUMN_VECTOR_SCALAR1 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)    TYPE BASENAME##IDX_COL = (TYPE)((X##0));

Definition at line 1314 of file gemm_helpers.h.

◆ COLUMN_VECTOR_SCALAR16

#define COLUMN_VECTOR_SCALAR16 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)
Value:
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7), (X##8), (X##9), (X##A), (X##B), (X##C), (X##D), (X##E), (X##F));
#define TYPE
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1328 of file gemm_helpers.h.

◆ COLUMN_VECTOR_SCALAR2

#define COLUMN_VECTOR_SCALAR2 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)
Value:
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0), (X##1));
#define TYPE
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1316 of file gemm_helpers.h.

◆ COLUMN_VECTOR_SCALAR3

#define COLUMN_VECTOR_SCALAR3 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)
Value:
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 3))((X##0), (X##1), (X##2));
#define TYPE
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1319 of file gemm_helpers.h.

◆ COLUMN_VECTOR_SCALAR4

#define COLUMN_VECTOR_SCALAR4 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)
Value:
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0), (X##1), (X##2), (X##3));
#define TYPE
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1322 of file gemm_helpers.h.

◆ COLUMN_VECTOR_SCALAR8

#define COLUMN_VECTOR_SCALAR8 (   IDX_COL,
  BASENAME,
  X,
  TYPE 
)
Value:
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 8))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7));
#define TYPE
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1325 of file gemm_helpers.h.

◆ CONVERT_BLOCK

#define CONVERT_BLOCK (   M,
  N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)    CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)

Definition at line 1770 of file gemm_helpers.h.

◆ CONVERT_BLOCK_STR

#define CONVERT_BLOCK_STR (   M,
  N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)    CONVERT_ROW_##M(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)

Definition at line 1769 of file gemm_helpers.h.

◆ CONVERT_ROW_1

#define CONVERT_ROW_1 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##0 = CONVERT(BASENAME_SRC##0, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1678 of file gemm_helpers.h.

◆ CONVERT_ROW_10

#define CONVERT_ROW_10 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##9 = CONVERT(BASENAME_SRC##9, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
#define CONVERT(x, type)
Definition: helpers.h:731
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1722 of file gemm_helpers.h.

◆ CONVERT_ROW_11

#define CONVERT_ROW_11 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##A = CONVERT(BASENAME_SRC##A, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
#define CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1727 of file gemm_helpers.h.

◆ CONVERT_ROW_12

#define CONVERT_ROW_12 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##B = CONVERT(BASENAME_SRC##B, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
unsigned int N
#define CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1732 of file gemm_helpers.h.

◆ CONVERT_ROW_13

#define CONVERT_ROW_13 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##C = CONVERT(BASENAME_SRC##C, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
#define CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1737 of file gemm_helpers.h.

◆ CONVERT_ROW_14

#define CONVERT_ROW_14 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##D = CONVERT(BASENAME_SRC##D, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
unsigned int N
#define CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1742 of file gemm_helpers.h.

◆ CONVERT_ROW_15

#define CONVERT_ROW_15 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##E = CONVERT(BASENAME_SRC##E, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
unsigned int N
#define CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1747 of file gemm_helpers.h.

◆ CONVERT_ROW_16

#define CONVERT_ROW_16 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##F = CONVERT(BASENAME_SRC##F, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
#define CONVERT(x, type)
Definition: helpers.h:731
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1752 of file gemm_helpers.h.

◆ CONVERT_ROW_2

#define CONVERT_ROW_2 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##1 = CONVERT(BASENAME_SRC##1, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
#define CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1682 of file gemm_helpers.h.

◆ CONVERT_ROW_3

#define CONVERT_ROW_3 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##2 = CONVERT(BASENAME_SRC##2, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
#define CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1687 of file gemm_helpers.h.

◆ CONVERT_ROW_4

#define CONVERT_ROW_4 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##3 = CONVERT(BASENAME_SRC##3, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
#define CONVERT(x, type)
Definition: helpers.h:731
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1692 of file gemm_helpers.h.

◆ CONVERT_ROW_5

#define CONVERT_ROW_5 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##4 = CONVERT(BASENAME_SRC##4, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
#define CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1697 of file gemm_helpers.h.

◆ CONVERT_ROW_6

#define CONVERT_ROW_6 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##5 = CONVERT(BASENAME_SRC##5, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
#define CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1702 of file gemm_helpers.h.

◆ CONVERT_ROW_7

#define CONVERT_ROW_7 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##6 = CONVERT(BASENAME_SRC##6, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728
#define CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)

Definition at line 1707 of file gemm_helpers.h.

◆ CONVERT_ROW_8

#define CONVERT_ROW_8 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##7 = CONVERT(BASENAME_SRC##7, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
unsigned int N
#define CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1712 of file gemm_helpers.h.

◆ CONVERT_ROW_9

#define CONVERT_ROW_9 (   N,
  DATA_TYPE,
  BASENAME_SRC,
  BASENAME_DST 
)
Value:
CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
VEC_DATA_TYPE(DATA_TYPE, N) \
BASENAME_DST##8 = CONVERT(BASENAME_SRC##8, VEC_DATA_TYPE(DATA_TYPE, N));
#define CONVERT(x, type)
Definition: helpers.h:731
#define CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
unsigned int N
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 1717 of file gemm_helpers.h.

◆ LOAD_BLOCK

#define LOAD_BLOCK (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)    LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)

Definition at line 398 of file gemm_helpers.h.

◆ LOAD_BLOCK_BOUNDARY_AWARE

#define LOAD_BLOCK_BOUNDARY_AWARE (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Z,
  PARTIAL_STORE_M0,
  PARTIAL_STORE_N0,
  PARTIAL_COND_Y,
  PARTIAL_COND_X 
)    LOAD_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)

Definition at line 678 of file gemm_helpers.h.

◆ LOAD_BLOCK_INDIRECT

#define LOAD_BLOCK_INDIRECT (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)    LOAD_BLOCK_INDIRECT_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)

Definition at line 977 of file gemm_helpers.h.

◆ LOAD_BLOCK_INDIRECT_STR

#define LOAD_BLOCK_INDIRECT_STR (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)    LOAD_ROW_INDIRECT_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)

Definition at line 976 of file gemm_helpers.h.

◆ LOAD_BLOCK_PARTIAL

#define LOAD_BLOCK_PARTIAL (   LOAD_M0,
  LOAD_N0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)    LOAD_BLOCK_PARTIAL_STR(LOAD_M0, LOAD_N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)

Definition at line 520 of file gemm_helpers.h.

◆ LOAD_BLOCK_PARTIAL_IN_X

#define LOAD_BLOCK_PARTIAL_IN_X (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Z,
  PARTIAL_STORE_N0,
  PARTIAL_COND_X 
)
Value:
if(!(PARTIAL_COND_X)) \
{ \
LOAD_BLOCK_PARTIAL(M0, N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z); \
} \
else \
{ \
LOAD_BLOCK_PARTIAL(M0, PARTIAL_STORE_N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z); \
}
#define PARTIAL_STORE_N0
Definition: transpose.cl:25

Load a block that can only be partial in x but not y.

Note
in case N0 or PARTIAL_STORE_N0 != 1, 2, 3, 4, 8, 16, extra vload(s) will be invoked, thus incurring small performance penalty.

The data to load is expected to have consecutive names for each row. E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2. The Z offset is expected to have consecutive names. E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.

Parameters
[in]M0The number of rows to load, for non-partial blocks. Supported: 1-16
[in]N0The size of each vector, for non-partial blocks. Supported: 1, 2, 3, 4, 8, 16
[in]DATA_TYPEThe data type of the vectors
[in]BASENAMEThe basename of the variables
[in]PTRThe base pointer
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride value in y-axis direction
[in]ZThe offset in z-axis direction
[in]PARTIAL_STORE_N0The partial size in x, for partial blocks. Supported range: [1, N0)
[in]PARTIAL_COND_XCondition on the x axis to perform the partial load X. True to use PARTIAL_STORE_N0 rather than N0.

Definition at line 580 of file gemm_helpers.h.

◆ LOAD_BLOCK_PARTIAL_IN_X_AND_Y

#define LOAD_BLOCK_PARTIAL_IN_X_AND_Y (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Z,
  PARTIAL_STORE_M0,
  PARTIAL_STORE_N0,
  PARTIAL_COND_Y,
  PARTIAL_COND_X 
)
Value:
if(!(PARTIAL_COND_X) && !(PARTIAL_COND_Y)) \
{ \
LOAD_BLOCK_PARTIAL(M0, N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z); \
} \
else if((PARTIAL_COND_Y) && !(PARTIAL_COND_X)) \
{ \
LOAD_BLOCK_PARTIAL(PARTIAL_STORE_M0, N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z); \
} \
else if(!(PARTIAL_COND_Y) && (PARTIAL_COND_X)) \
{ \
LOAD_BLOCK_PARTIAL(M0, PARTIAL_STORE_N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z); \
} \
else \
{ \
LOAD_BLOCK_PARTIAL(PARTIAL_STORE_M0, PARTIAL_STORE_N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z); \
}
#define PARTIAL_STORE_M0
Definition: transpose.cl:24
#define PARTIAL_STORE_N0
Definition: transpose.cl:25

Load a block that can be partial in both x and y dimensions.

Note
in cases PARTIAL_STORE_N0 != 1, 2, 3, 4, 8, 16, extra vload(s) will be invoked, thus incurring small performance penalty.

The data to load is expected to have consecutive names for each row. E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2. The Z offset is expected to have consecutive names. E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.

Parameters
[in]M0The number of rows to load, for non-partial blocks. Supported: 1-16
[in]N0The size of each vector, for non-partial blocks. Supported: 1, 2, 3, 4, 8, 16
[in]DATA_TYPEThe data type of the vectors
[in]BASENAMEThe basename of the variables
[in]PTRThe base pointer
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride value in y-axis direction
[in]ZThe offset in z-axis direction
[in]PARTIAL_STORE_M0The partial size in y, for partial blocks. Supported range: [1, M0)
[in]PARTIAL_STORE_N0The partial size in x, for partial blocks. Supported range: [1, N0)
[in]PARTIAL_COND_YCondition on the y axis to perform the partial load Y. True to use PARTIAL_STORE_M0 rather than M0.
[in]PARTIAL_COND_XCondition on the x axis to perform the partial load X. True to use PARTIAL_STORE_N0 rather than N0.

Definition at line 543 of file gemm_helpers.h.

◆ LOAD_BLOCK_PARTIAL_IN_Y

#define LOAD_BLOCK_PARTIAL_IN_Y (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Z,
  PARTIAL_STORE_M0,
  PARTIAL_COND_Y 
)
Value:
if(!(PARTIAL_COND_Y)) \
{ \
LOAD_BLOCK_PARTIAL(M0, N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z); \
} \
else \
{ \
LOAD_BLOCK_PARTIAL(PARTIAL_STORE_M0, N0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z); \
}
#define PARTIAL_STORE_M0
Definition: transpose.cl:24

Load a block that can only be partial in y but not x.

Note
in case N0 or PARTIAL_STORE_N0 != 1, 2, 3, 4, 8, 16, extra vload(s) will be invoked, thus incurring small performance penalty.

The data to store is expected to have consecutive names for each row. E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2. The Z offset is expected to have consecutive names. E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.

Parameters
[in]M0The number of rows to store, for non-partial blocks. Supported: 1-16
[in]N0The size of each vector, for non-partial blocks. Supported: 1, 2, 3, 4, 8, 16
[in]DATA_TYPEThe data type of the vectors
[in]BASENAMEThe basename of the variables
[in]PTRThe base pointer
[in]OFFSETThe offset within a row
[in]STRIDE_YThe stride value in y-axis direction
[in]ZThe offset in z-axis direction
[in]PARTIAL_STORE_M0The partial size in y, for partial blocks. Supported range: [1, M0)
[in]PARTIAL_COND_YCondition on the y axis to perform the partial store Y. True to use PARTIAL_STORE_M0 rather than M0.

Definition at line 609 of file gemm_helpers.h.

◆ LOAD_BLOCK_PARTIAL_STR

#define LOAD_BLOCK_PARTIAL_STR (   LOAD_M0,
  LOAD_N0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)    LOAD_ROW_PARTIAL_##LOAD_M0(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)

Definition at line 519 of file gemm_helpers.h.

◆ LOAD_BLOCK_STR

#define LOAD_BLOCK_STR (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)    LOAD_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)

Definition at line 397 of file gemm_helpers.h.

◆ LOAD_ELEMENT_1

#define LOAD_ELEMENT_1 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##0 = *((__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y));
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 990 of file gemm_helpers.h.

◆ LOAD_ELEMENT_10

#define LOAD_ELEMENT_10 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##9 = *((__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y));
#define LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1034 of file gemm_helpers.h.

◆ LOAD_ELEMENT_11

#define LOAD_ELEMENT_11 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##A = *((__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y));
#define LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1039 of file gemm_helpers.h.

◆ LOAD_ELEMENT_12

#define LOAD_ELEMENT_12 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##B = *((__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y));
#define LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1044 of file gemm_helpers.h.

◆ LOAD_ELEMENT_13

#define LOAD_ELEMENT_13 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##C = *((__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y));
#define LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1049 of file gemm_helpers.h.

◆ LOAD_ELEMENT_14

#define LOAD_ELEMENT_14 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##D = *((__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y));
#define LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1054 of file gemm_helpers.h.

◆ LOAD_ELEMENT_15

#define LOAD_ELEMENT_15 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##E = *((__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y));
#define LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1059 of file gemm_helpers.h.

◆ LOAD_ELEMENT_16

#define LOAD_ELEMENT_16 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##F = *((__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y));
#define LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1064 of file gemm_helpers.h.

◆ LOAD_ELEMENT_2

#define LOAD_ELEMENT_2 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##1 = *((__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y));
#define LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
Definition: gemm_helpers.h:990

Definition at line 994 of file gemm_helpers.h.

◆ LOAD_ELEMENT_3

#define LOAD_ELEMENT_3 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##2 = *((__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y));
#define LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
Definition: gemm_helpers.h:994

Definition at line 999 of file gemm_helpers.h.

◆ LOAD_ELEMENT_4

#define LOAD_ELEMENT_4 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##3 = *((__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y));
#define LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
Definition: gemm_helpers.h:999

Definition at line 1004 of file gemm_helpers.h.

◆ LOAD_ELEMENT_5

#define LOAD_ELEMENT_5 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##4 = *((__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y));
#define LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1009 of file gemm_helpers.h.

◆ LOAD_ELEMENT_6

#define LOAD_ELEMENT_6 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##5 = *((__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y));
#define LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1014 of file gemm_helpers.h.

◆ LOAD_ELEMENT_7

#define LOAD_ELEMENT_7 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##6 = *((__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y));
#define LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1019 of file gemm_helpers.h.

◆ LOAD_ELEMENT_8

#define LOAD_ELEMENT_8 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##7 = *((__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y));
#define LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1024 of file gemm_helpers.h.

◆ LOAD_ELEMENT_9

#define LOAD_ELEMENT_9 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)
Value:
LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##8 = *((__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y));
#define LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1029 of file gemm_helpers.h.

◆ LOAD_ROW_1

#define LOAD_ROW_1 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##0 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y + Z##0));
#define VLOAD(size)
Definition: helpers.h:204
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 297 of file gemm_helpers.h.

◆ LOAD_ROW_10

#define LOAD_ROW_10 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##9 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y + Z##9));
#define VLOAD(size)
Definition: helpers.h:204
#define LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:336

Definition at line 341 of file gemm_helpers.h.

◆ LOAD_ROW_11

#define LOAD_ROW_11 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##A = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y + Z##A));
#define LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:341
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 346 of file gemm_helpers.h.

◆ LOAD_ROW_12

#define LOAD_ROW_12 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##B = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y + Z##B));
#define LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:346
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 351 of file gemm_helpers.h.

◆ LOAD_ROW_13

#define LOAD_ROW_13 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##C = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y + Z##C));
#define LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:351
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 356 of file gemm_helpers.h.

◆ LOAD_ROW_14

#define LOAD_ROW_14 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##D = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y + Z##D));
#define VLOAD(size)
Definition: helpers.h:204
#define LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:356

Definition at line 361 of file gemm_helpers.h.

◆ LOAD_ROW_15

#define LOAD_ROW_15 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##E = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y + Z##E));
#define LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:361
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 366 of file gemm_helpers.h.

◆ LOAD_ROW_16

#define LOAD_ROW_16 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##F = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y + Z##F));
#define VLOAD(size)
Definition: helpers.h:204
#define LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:366

Definition at line 371 of file gemm_helpers.h.

◆ LOAD_ROW_2

#define LOAD_ROW_2 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##1 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y + Z##1));
#define LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:297
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 301 of file gemm_helpers.h.

◆ LOAD_ROW_3

#define LOAD_ROW_3 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##2 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y + Z##2));
#define LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:301
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 306 of file gemm_helpers.h.

◆ LOAD_ROW_4

#define LOAD_ROW_4 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##3 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y + Z##3));
#define VLOAD(size)
Definition: helpers.h:204
#define LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:306

Definition at line 311 of file gemm_helpers.h.

◆ LOAD_ROW_5

#define LOAD_ROW_5 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##4 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y + Z##4));
#define LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:311
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 316 of file gemm_helpers.h.

◆ LOAD_ROW_6

#define LOAD_ROW_6 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##5 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y + Z##5));
#define VLOAD(size)
Definition: helpers.h:204
#define LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:316

Definition at line 321 of file gemm_helpers.h.

◆ LOAD_ROW_7

#define LOAD_ROW_7 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##6 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y + Z##6));
#define LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:321
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 326 of file gemm_helpers.h.

◆ LOAD_ROW_8

#define LOAD_ROW_8 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##7 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y + Z##7));
#define LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:326
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 331 of file gemm_helpers.h.

◆ LOAD_ROW_9

#define LOAD_ROW_9 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##8 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y + Z##8));
#define LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:331
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 336 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_1

#define LOAD_ROW_INDIRECT_1 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##0; \
if(Y_MASK##0 != 0) \
BASENAME##0 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##0 * STRIDE_Y)); \
else \
BASENAME##0 = 0;
#define VLOAD(size)
Definition: helpers.h:204
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Definition at line 813 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_10

#define LOAD_ROW_INDIRECT_10 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##9; \
if(Y_MASK##9 != 0) \
BASENAME##9 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##9 * STRIDE_Y)); \
else \
BASENAME##9 = 0;
#define LOAD_ROW_INDIRECT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:884
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 893 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_11

#define LOAD_ROW_INDIRECT_11 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##A; \
if(Y_MASK##A != 0) \
BASENAME##A = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##A * STRIDE_Y)); \
else \
BASENAME##A = 0;
#define LOAD_ROW_INDIRECT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:893
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 902 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_12

#define LOAD_ROW_INDIRECT_12 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##B; \
if(Y_MASK##B != 0) \
BASENAME##B = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##B * STRIDE_Y)); \
else \
BASENAME##B = 0;
#define LOAD_ROW_INDIRECT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:902
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 911 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_13

#define LOAD_ROW_INDIRECT_13 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##C; \
if(Y_MASK##C != 0) \
BASENAME##C = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##C * STRIDE_Y)); \
else \
BASENAME##C = 0;
#define VLOAD(size)
Definition: helpers.h:204
#define LOAD_ROW_INDIRECT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:911

Definition at line 920 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_14

#define LOAD_ROW_INDIRECT_14 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##D; \
if(Y_MASK##D != 0) \
BASENAME##D = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##D * STRIDE_Y)); \
else \
BASENAME##D = 0;
#define LOAD_ROW_INDIRECT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:920
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 929 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_15

#define LOAD_ROW_INDIRECT_15 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##E; \
if(Y_MASK##E != 0) \
BASENAME##E = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##E * STRIDE_Y)); \
else \
BASENAME##E = 0;
#define LOAD_ROW_INDIRECT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:929
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 938 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_16

#define LOAD_ROW_INDIRECT_16 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##F; \
if(Y_MASK##F != 0) \
BASENAME##F = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##F * STRIDE_Y)); \
else \
BASENAME##F = 0;
#define LOAD_ROW_INDIRECT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:938
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 947 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_2

#define LOAD_ROW_INDIRECT_2 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##1; \
if(Y_MASK##1 != 0) \
BASENAME##1 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##1 * STRIDE_Y)); \
else \
BASENAME##1 = 0;
#define LOAD_ROW_INDIRECT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:813
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 821 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_3

#define LOAD_ROW_INDIRECT_3 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##2; \
if(Y_MASK##2 != 0) \
BASENAME##2 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##2 * STRIDE_Y)); \
else \
BASENAME##2 = 0;
#define VLOAD(size)
Definition: helpers.h:204
#define LOAD_ROW_INDIRECT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:821

Definition at line 830 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_4

#define LOAD_ROW_INDIRECT_4 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##3; \
if(Y_MASK##3 != 0) \
BASENAME##3 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##3 * STRIDE_Y)); \
else \
BASENAME##3 = 0;
#define LOAD_ROW_INDIRECT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:830
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 839 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_5

#define LOAD_ROW_INDIRECT_5 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##4; \
if(Y_MASK##4 != 0) \
BASENAME##4 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##4 * STRIDE_Y)); \
else \
BASENAME##4 = 0;
#define LOAD_ROW_INDIRECT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:839
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 848 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_6

#define LOAD_ROW_INDIRECT_6 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##5; \
if(Y_MASK##5 != 0) \
BASENAME##5 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##5 * STRIDE_Y)); \
else \
BASENAME##5 = 0;
#define LOAD_ROW_INDIRECT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:848
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 857 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_7

#define LOAD_ROW_INDIRECT_7 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##6; \
if(Y_MASK##6 != 0) \
BASENAME##6 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##6 * STRIDE_Y)); \
else \
BASENAME##6 = 0;
#define LOAD_ROW_INDIRECT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:857
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 866 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_8

#define LOAD_ROW_INDIRECT_8 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##7; \
if(Y_MASK##7 != 0) \
BASENAME##7 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##7 * STRIDE_Y)); \
else \
BASENAME##7 = 0;
#define LOAD_ROW_INDIRECT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:866
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 875 of file gemm_helpers.h.

◆ LOAD_ROW_INDIRECT_9

#define LOAD_ROW_INDIRECT_9 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
  Y,
  Y_MASK 
)
Value:
LOAD_ROW_INDIRECT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK) \
VEC_DATA_TYPE(DATA_TYPE, N0) \
BASENAME##8; \
if(Y_MASK##8 != 0) \
BASENAME##8 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + Y##8 * STRIDE_Y)); \
else \
BASENAME##8 = 0;
#define LOAD_ROW_INDIRECT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Y, Y_MASK)
Definition: gemm_helpers.h:875
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 884 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_1

#define LOAD_ROW_PARTIAL_1 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##0, 0, (__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y + Z##0));
#define VLOAD_PARTIAL(size, load_size)
Definition: helpers.h:222

Definition at line 417 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_10

#define LOAD_ROW_PARTIAL_10 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_9(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##9, 0, (__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y + Z##9));
#define LOAD_ROW_PARTIAL_9(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:456

Definition at line 461 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_11

#define LOAD_ROW_PARTIAL_11 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_10(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##A, 0, (__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y + Z##A));
#define LOAD_ROW_PARTIAL_10(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:461

Definition at line 466 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_12

#define LOAD_ROW_PARTIAL_12 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_11(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##B, 0, (__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y + Z##B));
#define LOAD_ROW_PARTIAL_11(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:466

Definition at line 471 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_13

#define LOAD_ROW_PARTIAL_13 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_12(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##C, 0, (__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y + Z##C));
#define LOAD_ROW_PARTIAL_12(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:471

Definition at line 476 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_14

#define LOAD_ROW_PARTIAL_14 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_13(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##D, 0, (__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y + Z##D));
#define LOAD_ROW_PARTIAL_13(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:476

Definition at line 481 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_15

#define LOAD_ROW_PARTIAL_15 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_14(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##E, 0, (__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y + Z##E));
#define LOAD_ROW_PARTIAL_14(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:481

Definition at line 486 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_16

#define LOAD_ROW_PARTIAL_16 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_15(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##F, 0, (__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y + Z##F));
#define LOAD_ROW_PARTIAL_15(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:486

Definition at line 491 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_2

#define LOAD_ROW_PARTIAL_2 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_1(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##1, 0, (__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y + Z##1));
#define LOAD_ROW_PARTIAL_1(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:417

Definition at line 421 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_3

#define LOAD_ROW_PARTIAL_3 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_2(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##2, 0, (__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y + Z##2));
#define LOAD_ROW_PARTIAL_2(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:421

Definition at line 426 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_4

#define LOAD_ROW_PARTIAL_4 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_3(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##3, 0, (__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y + Z##3));
#define LOAD_ROW_PARTIAL_3(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:426

Definition at line 431 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_5

#define LOAD_ROW_PARTIAL_5 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_4(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##4, 0, (__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y + Z##4));
#define LOAD_ROW_PARTIAL_4(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:431

Definition at line 436 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_6

#define LOAD_ROW_PARTIAL_6 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_5(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##5, 0, (__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y + Z##5));
#define LOAD_ROW_PARTIAL_5(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:436

Definition at line 441 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_7

#define LOAD_ROW_PARTIAL_7 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_6(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##6, 0, (__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y + Z##6));
#define LOAD_ROW_PARTIAL_6(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:441

Definition at line 446 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_8

#define LOAD_ROW_PARTIAL_8 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_7(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##7, 0, (__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y + Z##7));
#define LOAD_ROW_PARTIAL_7(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:446

Definition at line 451 of file gemm_helpers.h.

◆ LOAD_ROW_PARTIAL_9

#define LOAD_ROW_PARTIAL_9 (   N0,
  LOAD_N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_ROW_PARTIAL_8(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
VLOAD_PARTIAL(N0, LOAD_N0) \
(BASENAME##8, 0, (__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y + Z##8));
#define LOAD_ROW_PARTIAL_8(N0, LOAD_N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:451

Definition at line 456 of file gemm_helpers.h.

◆ LOAD_SCALAR_AS_VECTOR

#define LOAD_SCALAR_AS_VECTOR (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)    LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1088 of file gemm_helpers.h.

◆ LOAD_SCALAR_AS_VECTOR_STR

#define LOAD_SCALAR_AS_VECTOR_STR (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  OFFSET,
  STRIDE_Y 
)    LOAD_ELEMENT_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)

Definition at line 1087 of file gemm_helpers.h.

◆ LOAD_TENSOR

#define LOAD_TENSOR (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)    LOAD_TENSOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)

Definition at line 190 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X0

#define LOAD_TENSOR_M0X0 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)    ({})

Definition at line 205 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X1

#define LOAD_TENSOR_M0X1 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)    LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);

Definition at line 208 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X10

#define LOAD_TENSOR_M0X10 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)
Value:
LOAD_TENSOR(M0, 8, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin); \
LOAD_TENSOR(M0, 2, DATA_TYPE, a, input_ptr + 8 * sizeof(DATA_TYPE), 8, src_stride_y, zin);
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:190

Definition at line 239 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X11

#define LOAD_TENSOR_M0X11 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)
Value:
LOAD_TENSOR(M0, 8, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin); \
LOAD_TENSOR(M0, 3, DATA_TYPE, a, input_ptr + 8 * sizeof(DATA_TYPE), 8, src_stride_y, zin);
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:190

Definition at line 243 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X12

#define LOAD_TENSOR_M0X12 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)
Value:
LOAD_TENSOR(M0, 8, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin); \
LOAD_TENSOR(M0, 4, DATA_TYPE, a, input_ptr + 8 * sizeof(DATA_TYPE), 8, src_stride_y, zin);
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:190

Definition at line 247 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X13

#define LOAD_TENSOR_M0X13 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)
Value:
LOAD_TENSOR(M0, 8, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin); \
LOAD_TENSOR(M0, 4, DATA_TYPE, a, input_ptr + 8 * sizeof(DATA_TYPE), 8, src_stride_y, zin); \
LOAD_TENSOR(M0, 1, DATA_TYPE, a, input_ptr + 12 * sizeof(DATA_TYPE), 12, src_stride_y, zin);
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:190

Definition at line 251 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X14

#define LOAD_TENSOR_M0X14 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)
Value:
LOAD_TENSOR(M0, 8, DATA_TYPE, a, input_ptr 0, src_stride_y, zin); \
LOAD_TENSOR(M0, 4, DATA_TYPE, a, input_ptr + 8 * sizeof(DATA_TYPE), 8, src_stride_y, zin); \
LOAD_TENSOR(M0, 2, DATA_TYPE, a, input_ptr + 12 * sizeof(DATA_TYPE), 12, src_stride_y, zin);
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:190

Definition at line 256 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X15

#define LOAD_TENSOR_M0X15 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)
Value:
LOAD_TENSOR(M0, 8, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin); \
LOAD_TENSOR(M0, 4, DATA_TYPE, a, input_ptr + 8 * sizeof(DATA_TYPE), 8, src_stride_y, zin); \
LOAD_TENSOR(M0, 3, DATA_TYPE, a, input_ptr + 12 * sizeof(DATA_TYPE), 12, src_stride_y, zin);
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:190

Definition at line 261 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X16

#define LOAD_TENSOR_M0X16 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)    LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);

Definition at line 266 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X2

#define LOAD_TENSOR_M0X2 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)    LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);

Definition at line 211 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X3

#define LOAD_TENSOR_M0X3 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)    LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);

Definition at line 214 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X4

#define LOAD_TENSOR_M0X4 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)    LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);

Definition at line 217 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X5

#define LOAD_TENSOR_M0X5 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)
Value:
LOAD_TENSOR(M0, 4, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin); \
LOAD_TENSOR(M0, 1, DATA_TYPE, a, input_ptr + 4 * sizeof(DATA_TYPE), 4, src_stride_y, zin);
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:190

Definition at line 220 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X6

#define LOAD_TENSOR_M0X6 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)
Value:
LOAD_TENSOR(M0, 4, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin); \
LOAD_TENSOR(M0, 2, DATA_TYPE, a, input_ptr + 4 * sizeof(DATA_TYPE), 4, src_stride_y, zin);
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:190

Definition at line 224 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X7

#define LOAD_TENSOR_M0X7 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)
Value:
LOAD_TENSOR(M0, 4, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin); \
LOAD_TENSOR(M0, 3, DATA_TYPE, a, input_ptr + 4 * sizeof(DATA_TYPE), 4, src_stride_y, zin);
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:190

Definition at line 228 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X8

#define LOAD_TENSOR_M0X8 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)    LOAD_TENSOR(M0, N0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);

Definition at line 232 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0X9

#define LOAD_TENSOR_M0X9 (   M0,
  N0,
  DATA_TYPE,
  a,
  input_ptr,
  src_stride_y,
  zin 
)
Value:
LOAD_TENSOR(M0, 8, DATA_TYPE, a, input_ptr 0, src_stride_y, zin); \
LOAD_TENSOR(M0, 1, DATA_TYPE, a, input_ptr + 8 * sizeof(DATA_TYPE), 8, src_stride_y, zin);
#define LOAD_TENSOR(M0, N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:190

Definition at line 235 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0XN0

#define LOAD_TENSOR_M0XN0 (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  STRIDE_Y,
 
)    LOAD_TENSOR_M0XN0_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)

Definition at line 283 of file gemm_helpers.h.

◆ LOAD_TENSOR_M0XN0_STR

#define LOAD_TENSOR_M0XN0_STR (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  STRIDE_Y,
 
)    LOAD_TENSOR_M0X##N0(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)

Definition at line 282 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_0

#define LOAD_TENSOR_ROW_0 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)    ({})

Definition at line 103 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_1

#define LOAD_TENSOR_ROW_1 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)    SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##0) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));

Definition at line 106 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_10

#define LOAD_TENSOR_ROW_10 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_9(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##9) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
#define LOAD_TENSOR_ROW_9(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:137
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 141 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_11

#define LOAD_TENSOR_ROW_11 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_10(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##A) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
#define LOAD_TENSOR_ROW_10(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:141
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 145 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_12

#define LOAD_TENSOR_ROW_12 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_11(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##B) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
#define LOAD_TENSOR_ROW_11(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:145
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 149 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_13

#define LOAD_TENSOR_ROW_13 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_12(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##C) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
#define LOAD_TENSOR_ROW_12(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:149
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 153 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_14

#define LOAD_TENSOR_ROW_14 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_13(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##D) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
#define LOAD_TENSOR_ROW_13(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:153
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 157 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_15

#define LOAD_TENSOR_ROW_15 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_14(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##E) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
#define LOAD_TENSOR_ROW_14(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:157
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 161 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_16

#define LOAD_TENSOR_ROW_16 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_15(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##F) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
#define LOAD_TENSOR_ROW_15(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:161
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 165 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_2

#define LOAD_TENSOR_ROW_2 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_1(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##1) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
#define LOAD_TENSOR_ROW_1(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:106
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 109 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_3

#define LOAD_TENSOR_ROW_3 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_2(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##2) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
#define LOAD_TENSOR_ROW_2(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:109
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 113 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_4

#define LOAD_TENSOR_ROW_4 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_3(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##3) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
#define LOAD_TENSOR_ROW_3(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:113
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 117 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_5

#define LOAD_TENSOR_ROW_5 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_4(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##4) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
#define LOAD_TENSOR_ROW_4(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:117
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 121 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_6

#define LOAD_TENSOR_ROW_6 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_5(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##5) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
#define LOAD_TENSOR_ROW_5(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:121
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 125 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_7

#define LOAD_TENSOR_ROW_7 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_6(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##6) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
#define LOAD_TENSOR_ROW_6(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:125
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 129 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_8

#define LOAD_TENSOR_ROW_8 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_7(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##7) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
#define LOAD_TENSOR_ROW_7(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:129
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 133 of file gemm_helpers.h.

◆ LOAD_TENSOR_ROW_9

#define LOAD_TENSOR_ROW_9 (   N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)
Value:
LOAD_TENSOR_ROW_8(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z) \
SCALAR_ACCESS(COL_OFFSET, N0, BASENAME##8) = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
#define LOAD_TENSOR_ROW_8(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)
Definition: gemm_helpers.h:133
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 137 of file gemm_helpers.h.

◆ LOAD_TENSOR_STR

#define LOAD_TENSOR_STR (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  PTR,
  COL_OFFSET,
  STRIDE_Y,
 
)    LOAD_TENSOR_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, COL_OFFSET, STRIDE_Y, Z)

Definition at line 189 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D

#define LOAD_TEXTURE2D (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)    LOAD_TEXTURE2D_STR(M0, N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)

Definition at line 797 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_1

#define LOAD_TEXTURE2D_ROW_1 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)    BASENAME##0 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 0 * X_STEP_ROW), (Y_COORD + 0 * Y_STEP_ROW))

Definition at line 714 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_10

#define LOAD_TEXTURE2D_ROW_10 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_9(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##9 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 9 * X_STEP_ROW), (Y_COORD + 9 * Y_STEP_ROW))
#define LOAD_TEXTURE2D_ROW_9(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:745
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455

Definition at line 749 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_11

#define LOAD_TEXTURE2D_ROW_11 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_10(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##A = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 10 * X_STEP_ROW), (Y_COORD + 10 * Y_STEP_ROW))
#define LOAD_TEXTURE2D_ROW_10(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:749
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455

Definition at line 753 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_12

#define LOAD_TEXTURE2D_ROW_12 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_11(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##B = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 11 * X_STEP_ROW), (Y_COORD + 11 * Y_STEP_ROW))
#define LOAD_TEXTURE2D_ROW_11(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:753
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455

Definition at line 757 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_13

#define LOAD_TEXTURE2D_ROW_13 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_12(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##C = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 12 * X_STEP_ROW), (Y_COORD + 12 * Y_STEP_ROW))
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455
#define LOAD_TEXTURE2D_ROW_12(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:757

Definition at line 761 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_14

#define LOAD_TEXTURE2D_ROW_14 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_13(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##D = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 13 * X_STEP_ROW), (Y_COORD + 13 * Y_STEP_ROW))
#define LOAD_TEXTURE2D_ROW_13(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:761
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455

Definition at line 765 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_15

#define LOAD_TEXTURE2D_ROW_15 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_14(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##E = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 14 * X_STEP_ROW), (Y_COORD + 14 * Y_STEP_ROW))
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455
#define LOAD_TEXTURE2D_ROW_14(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:765

Definition at line 769 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_16

#define LOAD_TEXTURE2D_ROW_16 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_15(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##F = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 15 * X_STEP_ROW), (Y_COORD + 15 * Y_STEP_ROW))
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455
#define LOAD_TEXTURE2D_ROW_15(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:769

Definition at line 773 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_2

#define LOAD_TEXTURE2D_ROW_2 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_1(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##1 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 1 * X_STEP_ROW), (Y_COORD + 1 * Y_STEP_ROW))
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455
#define LOAD_TEXTURE2D_ROW_1(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:714

Definition at line 717 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_3

#define LOAD_TEXTURE2D_ROW_3 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_2(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##2 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 2 * X_STEP_ROW), (Y_COORD + 2 * Y_STEP_ROW))
#define LOAD_TEXTURE2D_ROW_2(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:717
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455

Definition at line 721 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_4

#define LOAD_TEXTURE2D_ROW_4 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_3(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##3 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 3 * X_STEP_ROW), (Y_COORD + 3 * Y_STEP_ROW))
#define LOAD_TEXTURE2D_ROW_3(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:721
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455

Definition at line 725 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_5

#define LOAD_TEXTURE2D_ROW_5 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_4(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##4 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 4 * X_STEP_ROW), (Y_COORD + 4 * Y_STEP_ROW))
#define LOAD_TEXTURE2D_ROW_4(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:725
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455

Definition at line 729 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_6

#define LOAD_TEXTURE2D_ROW_6 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_5(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##5 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 5 * X_STEP_ROW), (Y_COORD + 5 * Y_STEP_ROW))
#define LOAD_TEXTURE2D_ROW_5(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:729
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455

Definition at line 733 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_7

#define LOAD_TEXTURE2D_ROW_7 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_6(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##6 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 6 * X_STEP_ROW), (Y_COORD + 6 * Y_STEP_ROW))
#define LOAD_TEXTURE2D_ROW_6(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:733
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455

Definition at line 737 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_8

#define LOAD_TEXTURE2D_ROW_8 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_7(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##7 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 7 * X_STEP_ROW), (Y_COORD + 7 * Y_STEP_ROW))
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455
#define LOAD_TEXTURE2D_ROW_7(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:737

Definition at line 741 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_ROW_9

#define LOAD_TEXTURE2D_ROW_9 (   N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)
Value:
LOAD_TEXTURE2D_ROW_8(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
BASENAME##8 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 8 * X_STEP_ROW), (Y_COORD + 8 * Y_STEP_ROW))
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord)
Definition: helpers.h:455
#define LOAD_TEXTURE2D_ROW_8(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
Definition: gemm_helpers.h:741

Definition at line 745 of file gemm_helpers.h.

◆ LOAD_TEXTURE2D_STR

#define LOAD_TEXTURE2D_STR (   M0,
  N0,
  DATA_TYPE,
  BASENAME,
  IMG,
  X_COORD,
  Y_COORD,
  X_STEP_ROW,
  Y_STEP_ROW 
)    LOAD_TEXTURE2D_ROW_##M0(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)

Definition at line 796 of file gemm_helpers.h.

◆ SCALAR_ACCESS

#define SCALAR_ACCESS (   offset,
  n0,
 
)    SCALAR_ACCESS_STR(offset, n0, x)

Definition at line 37 of file gemm_helpers.h.

◆ scalar_access_0_1

#define scalar_access_0_1 (   x)    ((x).s0)

Definition at line 40 of file gemm_helpers.h.

◆ scalar_access_0_16

#define scalar_access_0_16 (   x)    ((x).s0123456789ABCDEF)

Definition at line 45 of file gemm_helpers.h.

◆ scalar_access_0_2

#define scalar_access_0_2 (   x)    ((x).s01)

Definition at line 41 of file gemm_helpers.h.

◆ scalar_access_0_3

#define scalar_access_0_3 (   x)    ((x).s012)

Definition at line 42 of file gemm_helpers.h.

◆ scalar_access_0_4

#define scalar_access_0_4 (   x)    ((x).s0123)

Definition at line 43 of file gemm_helpers.h.

◆ scalar_access_0_8

#define scalar_access_0_8 (   x)    ((x).s01234567)

Definition at line 44 of file gemm_helpers.h.

◆ scalar_access_12_1

#define scalar_access_12_1 (   x)    ((x).sC)

Definition at line 83 of file gemm_helpers.h.

◆ scalar_access_12_2

#define scalar_access_12_2 (   x)    ((x).sCD)

Definition at line 84 of file gemm_helpers.h.

◆ scalar_access_12_3

#define scalar_access_12_3 (   x)    ((x).sCDE)

Definition at line 85 of file gemm_helpers.h.

◆ scalar_access_12_4

#define scalar_access_12_4 (   x)    ((x).sCDEF)

Definition at line 86 of file gemm_helpers.h.

◆ scalar_access_16_1

#define scalar_access_16_1 (   x)    ((x).sF)

Definition at line 89 of file gemm_helpers.h.

◆ scalar_access_1_1

#define scalar_access_1_1 (   x)    ((x).s1)

Definition at line 48 of file gemm_helpers.h.

◆ scalar_access_1_2

#define scalar_access_1_2 (   x)    ((x).s12)

Definition at line 49 of file gemm_helpers.h.

◆ scalar_access_1_3

#define scalar_access_1_3 (   x)    ((x).s123)

Definition at line 50 of file gemm_helpers.h.

◆ scalar_access_1_4

#define scalar_access_1_4 (   x)    ((x).s1234)

Definition at line 51 of file gemm_helpers.h.

◆ scalar_access_1_8

#define scalar_access_1_8 (   x)    ((x).s12345678)

Definition at line 52 of file gemm_helpers.h.

◆ scalar_access_2_1

#define scalar_access_2_1 (   x)    ((x).s2)

Definition at line 55 of file gemm_helpers.h.

◆ scalar_access_2_2

#define scalar_access_2_2 (   x)    ((x).s23)

Definition at line 56 of file gemm_helpers.h.

◆ scalar_access_2_3

#define scalar_access_2_3 (   x)    ((x).s234)

Definition at line 57 of file gemm_helpers.h.

◆ scalar_access_2_4

#define scalar_access_2_4 (   x)    ((x).s2345)

Definition at line 58 of file gemm_helpers.h.

◆ scalar_access_2_8

#define scalar_access_2_8 (   x)    ((x).s23456789)

Definition at line 59 of file gemm_helpers.h.

◆ scalar_access_3_1

#define scalar_access_3_1 (   x)    ((x).s3)

Definition at line 62 of file gemm_helpers.h.

◆ scalar_access_3_2

#define scalar_access_3_2 (   x)    ((x).s34)

Definition at line 63 of file gemm_helpers.h.

◆ scalar_access_3_3

#define scalar_access_3_3 (   x)    ((x).s345)

Definition at line 64 of file gemm_helpers.h.

◆ scalar_access_3_4

#define scalar_access_3_4 (   x)    ((x).s3456)

Definition at line 65 of file gemm_helpers.h.

◆ scalar_access_3_8

#define scalar_access_3_8 (   x)    ((x).s3456789A)

Definition at line 66 of file gemm_helpers.h.

◆ scalar_access_4_1

#define scalar_access_4_1 (   x)    ((x).s4)

Definition at line 69 of file gemm_helpers.h.

◆ scalar_access_4_2

#define scalar_access_4_2 (   x)    ((x).s45)

Definition at line 70 of file gemm_helpers.h.

◆ scalar_access_4_3

#define scalar_access_4_3 (   x)    ((x).s456)

Definition at line 71 of file gemm_helpers.h.

◆ scalar_access_4_4

#define scalar_access_4_4 (   x)    ((x).s4567)

Definition at line 72 of file gemm_helpers.h.

◆ scalar_access_4_8

#define scalar_access_4_8 (   x)    ((x).s456789AB)

Definition at line 73 of file gemm_helpers.h.

◆ scalar_access_8_1

#define scalar_access_8_1 (   x)    ((x).s8)

Definition at line 76 of file gemm_helpers.h.

◆ scalar_access_8_2

#define scalar_access_8_2 (   x)    ((x).s89)

Definition at line 77 of file gemm_helpers.h.

◆ scalar_access_8_3

#define scalar_access_8_3 (   x)    ((x).s89A)

Definition at line 78 of file gemm_helpers.h.

◆ scalar_access_8_4

#define scalar_access_8_4 (   x)    ((x).s89AB)

Definition at line 79 of file gemm_helpers.h.

◆ scalar_access_8_8

#define scalar_access_8_8 (   x)    ((x).s89ABCDEF)

Definition at line 80 of file gemm_helpers.h.

◆ SCALAR_ACCESS_STR

#define SCALAR_ACCESS_STR (   offset,
  n0,
 
)    scalar_access_##offset##_##n0(x)

Utility macro to access a vector with the scalar positions.

Supported cases are: Offset can only be of the same size of the OpenCL vector (2,3,4,8,16)

Parameters
[in]offsetThe offset within the vector. Offset can only be of the same size of the OpenCL vector (2,3,4,8,16)
[in]n0The number of consecutive columns to access. n0 + offset must be <= 16
[in]xVector to access

Definition at line 36 of file gemm_helpers.h.

◆ SCALE_BLOCK

#define SCALE_BLOCK (   N,
  DATA_TYPE,
  BASENAME,
  SCALE 
)    SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE)

Definition at line 1274 of file gemm_helpers.h.

◆ SCALE_BLOCK_STR

#define SCALE_BLOCK_STR (   N,
  DATA_TYPE,
  BASENAME,
  SCALE 
)    SCALE_ROW_##N(DATA_TYPE, BASENAME, SCALE)

Definition at line 1273 of file gemm_helpers.h.

◆ SCALE_ROW_1

#define SCALE_ROW_1 (   DATA_TYPE,
  BASENAME,
  SCALE 
)    BASENAME##0 *= (DATA_TYPE)SCALE;

Definition at line 1198 of file gemm_helpers.h.

◆ SCALE_ROW_10

#define SCALE_ROW_10 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
BASENAME##9 *= (DATA_TYPE)SCALE;
#define SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE)

Definition at line 1233 of file gemm_helpers.h.

◆ SCALE_ROW_11

#define SCALE_ROW_11 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
BASENAME##A *= (DATA_TYPE)SCALE;
#define SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE)

Definition at line 1237 of file gemm_helpers.h.

◆ SCALE_ROW_12

#define SCALE_ROW_12 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
BASENAME##B *= (DATA_TYPE)SCALE;
#define SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE)

Definition at line 1241 of file gemm_helpers.h.

◆ SCALE_ROW_13

#define SCALE_ROW_13 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
BASENAME##C *= (DATA_TYPE)SCALE;
#define SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE)

Definition at line 1245 of file gemm_helpers.h.

◆ SCALE_ROW_14

#define SCALE_ROW_14 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
BASENAME##D *= (DATA_TYPE)SCALE;
#define SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE)

Definition at line 1249 of file gemm_helpers.h.

◆ SCALE_ROW_15

#define SCALE_ROW_15 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
BASENAME##E *= (DATA_TYPE)SCALE;
#define SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE)

Definition at line 1253 of file gemm_helpers.h.

◆ SCALE_ROW_16

#define SCALE_ROW_16 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
BASENAME##F *= (DATA_TYPE)SCALE;
#define SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE)

Definition at line 1257 of file gemm_helpers.h.

◆ SCALE_ROW_2

#define SCALE_ROW_2 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) \
BASENAME##1 *= (DATA_TYPE)SCALE;
#define SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE)

Definition at line 1201 of file gemm_helpers.h.

◆ SCALE_ROW_3

#define SCALE_ROW_3 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
BASENAME##2 *= (DATA_TYPE)SCALE;
#define SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE)

Definition at line 1205 of file gemm_helpers.h.

◆ SCALE_ROW_4

#define SCALE_ROW_4 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
BASENAME##3 *= (DATA_TYPE)SCALE;
#define SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE)

Definition at line 1209 of file gemm_helpers.h.

◆ SCALE_ROW_5

#define SCALE_ROW_5 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
BASENAME##4 *= (DATA_TYPE)SCALE;
#define SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE)

Definition at line 1213 of file gemm_helpers.h.

◆ SCALE_ROW_6

#define SCALE_ROW_6 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
BASENAME##5 *= (DATA_TYPE)SCALE;
#define SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE)

Definition at line 1217 of file gemm_helpers.h.

◆ SCALE_ROW_7

#define SCALE_ROW_7 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
BASENAME##6 *= (DATA_TYPE)SCALE;
#define SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE)

Definition at line 1221 of file gemm_helpers.h.

◆ SCALE_ROW_8

#define SCALE_ROW_8 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
BASENAME##7 *= (DATA_TYPE)SCALE;
#define SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE)

Definition at line 1225 of file gemm_helpers.h.

◆ SCALE_ROW_9

#define SCALE_ROW_9 (   DATA_TYPE,
  BASENAME,
  SCALE 
)
Value:
SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
BASENAME##8 *= (DATA_TYPE)SCALE;
#define SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE)

Definition at line 1229 of file gemm_helpers.h.

◆ TRANSPOSE_K0X1

#define TRANSPOSE_K0X1 (   K0,
  BASENAME,
  BS,
  TYPE 
)    COLUMN_VECTOR_SCALAR(K0, 0, BASENAME, BS, TYPE);

Definition at line 1342 of file gemm_helpers.h.

◆ TRANSPOSE_K0X16

#define TRANSPOSE_K0X16 (   K0,
  BASENAME,
  BS,
  TYPE 
)
Value:
TRANSPOSE_K0X8(K0, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, 8, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, 9, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, A, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, B, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, C, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, D, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, E, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, F, BASENAME, BS, TYPE);
#define TRANSPOSE_K0X8(K0, BASENAME, BS, TYPE)
#define TYPE

Definition at line 1359 of file gemm_helpers.h.

◆ TRANSPOSE_K0X2

#define TRANSPOSE_K0X2 (   K0,
  BASENAME,
  BS,
  TYPE 
)
Value:
COLUMN_VECTOR(K0, 0, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, 1, BASENAME, BS, TYPE);
#define TYPE
#define COLUMN_VECTOR(K0, IDX_COL, BASENAME, BS, TYPE)
Create column vectors to contain the values at the given index for a set of given vectors...

Definition at line 1344 of file gemm_helpers.h.

◆ TRANSPOSE_K0X3

#define TRANSPOSE_K0X3 (   K0,
  BASENAME,
  BS,
  TYPE 
)
Value:
TRANSPOSE_K0X2(K0, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, 2, BASENAME, BS, TYPE);
#define TYPE
#define TRANSPOSE_K0X2(K0, BASENAME, BS, TYPE)

Definition at line 1347 of file gemm_helpers.h.

◆ TRANSPOSE_K0X4

#define TRANSPOSE_K0X4 (   K0,
  BASENAME,
  BS,
  TYPE 
)
Value:
TRANSPOSE_K0X3(K0, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, 3, BASENAME, BS, TYPE);
#define TRANSPOSE_K0X3(K0, BASENAME, BS, TYPE)
#define TYPE

Definition at line 1350 of file gemm_helpers.h.

◆ TRANSPOSE_K0X8

#define TRANSPOSE_K0X8 (   K0,
  BASENAME,
  BS,
  TYPE 
)
Value:
TRANSPOSE_K0X4(K0, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, 4, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, 5, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, 6, BASENAME, BS, TYPE); \
COLUMN_VECTOR(K0, 7, BASENAME, BS, TYPE);
#define TYPE
#define TRANSPOSE_K0X4(K0, BASENAME, BS, TYPE)

Definition at line 1353 of file gemm_helpers.h.

◆ TRANSPOSE_K0XN0

#define TRANSPOSE_K0XN0 (   K0,
  N0,
  BASENAME,
  BS,
  TYPE 
)
Value:
CONCAT(TRANSPOSE_K0X, N0) \
(K0, BASENAME, BS, TYPE);
#define TYPE
#define CONCAT(a, b)
Concatenate two inputs.
Definition: helpers.h:56

Create transposed vectors form the given source vectors.

Parameters
[in]K0The size of source vectors
[in]N0The number of source vectors
[in]BASENAMEThe basename of transposed vectors
[in]BSThe basename of source vectors for transposition
[in]TYPEThe data type of the transposed vectors

Definition at line 1405 of file gemm_helpers.h.