Compute Library
 22.11
tile_helpers.h File Reference

Go to the source code of this file.

Macros

#define TILE_VECTOR_SIZE1   1
 
#define TILE_VECTOR_SIZE2   2
 
#define TILE_VECTOR_SIZE3   3
 
#define TILE_VECTOR_SIZE4   4
 
#define TILE_VECTOR_SIZE5   8
 
#define TILE_VECTOR_SIZE6   8
 
#define TILE_VECTOR_SIZE7   8
 
#define TILE_VECTOR_SIZE8   8
 
#define TILE_VECTOR_SIZE9   16
 
#define TILE_VECTOR_SIZE10   16
 
#define TILE_VECTOR_SIZE11   16
 
#define TILE_VECTOR_SIZE12   16
 
#define TILE_VECTOR_SIZE13   16
 
#define TILE_VECTOR_SIZE14   16
 
#define TILE_VECTOR_SIZE15   16
 
#define TILE_VECTOR_SIZE16   16
 
#define TILE_VECTOR_TYPE1(DATA_TYPE)   DATA_TYPE##1
 
#define TILE_VECTOR_TYPE2(DATA_TYPE)   DATA_TYPE##2
 
#define TILE_VECTOR_TYPE3(DATA_TYPE)   DATA_TYPE##3
 
#define TILE_VECTOR_TYPE4(DATA_TYPE)   DATA_TYPE##4
 
#define TILE_VECTOR_TYPE5(DATA_TYPE)   DATA_TYPE##8
 
#define TILE_VECTOR_TYPE6(DATA_TYPE)   DATA_TYPE##8
 
#define TILE_VECTOR_TYPE7(DATA_TYPE)   DATA_TYPE##8
 
#define TILE_VECTOR_TYPE8(DATA_TYPE)   DATA_TYPE##8
 
#define TILE_VECTOR_TYPE9(DATA_TYPE)   DATA_TYPE##16
 
#define TILE_VECTOR_TYPE10(DATA_TYPE)   DATA_TYPE##16
 
#define TILE_VECTOR_TYPE11(DATA_TYPE)   DATA_TYPE##16
 
#define TILE_VECTOR_TYPE12(DATA_TYPE)   DATA_TYPE##16
 
#define TILE_VECTOR_TYPE13(DATA_TYPE)   DATA_TYPE##16
 
#define TILE_VECTOR_TYPE14(DATA_TYPE)   DATA_TYPE##16
 
#define TILE_VECTOR_TYPE15(DATA_TYPE)   DATA_TYPE##16
 
#define TILE_VECTOR_TYPE16(DATA_TYPE)   DATA_TYPE##16
 
#define TILE(DATA_TYPE, H, W, BASENAME)   TILE_STR(DATA_TYPE, H, W, BASENAME)
 Tile object A tile object is a 2D memory block and can be accessed using the following syntax:

  1. a[m0].v = access the the vector at row "m0" (OpenCL vector)
  2. a[m0].s[x] = access the scalar element at row "m0" and column "n0" (scalar access)
More...
 
#define TILE_STR(DATA_TYPE, H, W, BASENAME)
 
#define TENSOR4D_IMAGE(name)
 
#define TENSOR4D_BUFFER(name)
 
#define TENSOR4D_STR(name, type)   TENSOR4D_##type(name)
 
#define TENSOR4D(name, type)   TENSOR4D_STR(name, type)
 
#define TENSOR4D_T_IMAGE(name)
 
#define TENSOR4D_T_BUFFER(name)
 
#define TENSOR4D_T_STR(name, type)   TENSOR4D_T_##type(name)
 
#define TENSOR4D_T(name, type)   TENSOR4D_T_STR(name, type)
 
#define TENSOR3D_T_IMAGE(name)
 
#define TENSOR3D_T_BUFFER(name)
 
#define TENSOR3D_T_STR(name, type)   TENSOR3D_T_##type(name)
 
#define TENSOR3D_T(name, type)   TENSOR3D_T_STR(name, type)
 
#define UNROLL_INCR(idx, step, macro)   idx += (step); (macro)
 
#define LOOP_UNROLLING_1(idx, step, macro)   (macro)
 
#define LOOP_UNROLLING_2(idx, step, macro)   LOOP_UNROLLING_1(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_3(idx, step, macro)   LOOP_UNROLLING_2(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_4(idx, step, macro)   LOOP_UNROLLING_3(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_5(idx, step, macro)   LOOP_UNROLLING_4(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_6(idx, step, macro)   LOOP_UNROLLING_5(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_7(idx, step, macro)   LOOP_UNROLLING_6(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_8(idx, step, macro)   LOOP_UNROLLING_7(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_9(idx, step, macro)   LOOP_UNROLLING_8(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_10(idx, step, macro)   LOOP_UNROLLING_9(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_11(idx, step, macro)   LOOP_UNROLLING_10(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_12(idx, step, macro)   LOOP_UNROLLING_11(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_13(idx, step, macro)   LOOP_UNROLLING_12(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_14(idx, step, macro)   LOOP_UNROLLING_13(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_15(idx, step, macro)   LOOP_UNROLLING_14(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_16(idx, step, macro)   LOOP_UNROLLING_15(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_17(idx, step, macro)   LOOP_UNROLLING_16(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_18(idx, step, macro)   LOOP_UNROLLING_17(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_19(idx, step, macro)   LOOP_UNROLLING_18(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_20(idx, step, macro)   LOOP_UNROLLING_19(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_21(idx, step, macro)   LOOP_UNROLLING_20(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_22(idx, step, macro)   LOOP_UNROLLING_21(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_23(idx, step, macro)   LOOP_UNROLLING_22(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_24(idx, step, macro)   LOOP_UNROLLING_23(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_25(idx, step, macro)   LOOP_UNROLLING_24(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_26(idx, step, macro)   LOOP_UNROLLING_25(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_27(idx, step, macro)   LOOP_UNROLLING_26(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_28(idx, step, macro)   LOOP_UNROLLING_27(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_29(idx, step, macro)   LOOP_UNROLLING_28(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_30(idx, step, macro)   LOOP_UNROLLING_29(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_31(idx, step, macro)   LOOP_UNROLLING_30(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_32(idx, step, macro)   LOOP_UNROLLING_31(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_33(idx, step, macro)   LOOP_UNROLLING_32(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_34(idx, step, macro)   LOOP_UNROLLING_33(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_35(idx, step, macro)   LOOP_UNROLLING_34(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_36(idx, step, macro)   LOOP_UNROLLING_35(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_37(idx, step, macro)   LOOP_UNROLLING_36(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_38(idx, step, macro)   LOOP_UNROLLING_37(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_39(idx, step, macro)   LOOP_UNROLLING_38(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_40(idx, step, macro)   LOOP_UNROLLING_39(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_41(idx, step, macro)   LOOP_UNROLLING_40(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_42(idx, step, macro)   LOOP_UNROLLING_41(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_43(idx, step, macro)   LOOP_UNROLLING_42(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_44(idx, step, macro)   LOOP_UNROLLING_43(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_45(idx, step, macro)   LOOP_UNROLLING_44(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_46(idx, step, macro)   LOOP_UNROLLING_45(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_47(idx, step, macro)   LOOP_UNROLLING_46(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_48(idx, step, macro)   LOOP_UNROLLING_47(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_49(idx, step, macro)   LOOP_UNROLLING_48(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_50(idx, step, macro)   LOOP_UNROLLING_49(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_51(idx, step, macro)   LOOP_UNROLLING_50(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_52(idx, step, macro)   LOOP_UNROLLING_51(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_53(idx, step, macro)   LOOP_UNROLLING_52(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_54(idx, step, macro)   LOOP_UNROLLING_53(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_55(idx, step, macro)   LOOP_UNROLLING_54(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_56(idx, step, macro)   LOOP_UNROLLING_55(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_57(idx, step, macro)   LOOP_UNROLLING_56(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_58(idx, step, macro)   LOOP_UNROLLING_57(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_59(idx, step, macro)   LOOP_UNROLLING_58(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_60(idx, step, macro)   LOOP_UNROLLING_59(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_61(idx, step, macro)   LOOP_UNROLLING_60(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_62(idx, step, macro)   LOOP_UNROLLING_61(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_63(idx, step, macro)   LOOP_UNROLLING_62(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_64(idx, step, macro)   LOOP_UNROLLING_63(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_65(idx, step, macro)   LOOP_UNROLLING_64(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_66(idx, step, macro)   LOOP_UNROLLING_65(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_67(idx, step, macro)   LOOP_UNROLLING_66(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_68(idx, step, macro)   LOOP_UNROLLING_67(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_69(idx, step, macro)   LOOP_UNROLLING_68(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_70(idx, step, macro)   LOOP_UNROLLING_69(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_71(idx, step, macro)   LOOP_UNROLLING_70(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_72(idx, step, macro)   LOOP_UNROLLING_71(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_73(idx, step, macro)   LOOP_UNROLLING_72(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_74(idx, step, macro)   LOOP_UNROLLING_73(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_75(idx, step, macro)   LOOP_UNROLLING_74(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_76(idx, step, macro)   LOOP_UNROLLING_75(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_77(idx, step, macro)   LOOP_UNROLLING_76(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_78(idx, step, macro)   LOOP_UNROLLING_77(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_79(idx, step, macro)   LOOP_UNROLLING_78(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_80(idx, step, macro)   LOOP_UNROLLING_79(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_81(idx, step, macro)   LOOP_UNROLLING_80(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_82(idx, step, macro)   LOOP_UNROLLING_81(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_83(idx, step, macro)   LOOP_UNROLLING_82(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_84(idx, step, macro)   LOOP_UNROLLING_83(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_85(idx, step, macro)   LOOP_UNROLLING_84(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_86(idx, step, macro)   LOOP_UNROLLING_85(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_87(idx, step, macro)   LOOP_UNROLLING_86(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_88(idx, step, macro)   LOOP_UNROLLING_87(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_89(idx, step, macro)   LOOP_UNROLLING_88(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_90(idx, step, macro)   LOOP_UNROLLING_89(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_91(idx, step, macro)   LOOP_UNROLLING_90(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_92(idx, step, macro)   LOOP_UNROLLING_91(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_93(idx, step, macro)   LOOP_UNROLLING_92(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_94(idx, step, macro)   LOOP_UNROLLING_93(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_95(idx, step, macro)   LOOP_UNROLLING_94(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_96(idx, step, macro)   LOOP_UNROLLING_95(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_97(idx, step, macro)   LOOP_UNROLLING_96(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_98(idx, step, macro)   LOOP_UNROLLING_97(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_99(idx, step, macro)   LOOP_UNROLLING_98(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_100(idx, step, macro)   LOOP_UNROLLING_99(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_101(idx, step, macro)   LOOP_UNROLLING_100(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_102(idx, step, macro)   LOOP_UNROLLING_101(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_103(idx, step, macro)   LOOP_UNROLLING_102(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_104(idx, step, macro)   LOOP_UNROLLING_103(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_105(idx, step, macro)   LOOP_UNROLLING_104(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_106(idx, step, macro)   LOOP_UNROLLING_105(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_107(idx, step, macro)   LOOP_UNROLLING_106(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_108(idx, step, macro)   LOOP_UNROLLING_107(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_109(idx, step, macro)   LOOP_UNROLLING_108(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_110(idx, step, macro)   LOOP_UNROLLING_109(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_111(idx, step, macro)   LOOP_UNROLLING_110(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_112(idx, step, macro)   LOOP_UNROLLING_111(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_113(idx, step, macro)   LOOP_UNROLLING_112(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_114(idx, step, macro)   LOOP_UNROLLING_113(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_115(idx, step, macro)   LOOP_UNROLLING_114(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_116(idx, step, macro)   LOOP_UNROLLING_115(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_117(idx, step, macro)   LOOP_UNROLLING_116(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_118(idx, step, macro)   LOOP_UNROLLING_117(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_119(idx, step, macro)   LOOP_UNROLLING_118(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_120(idx, step, macro)   LOOP_UNROLLING_119(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_121(idx, step, macro)   LOOP_UNROLLING_120(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_122(idx, step, macro)   LOOP_UNROLLING_121(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_123(idx, step, macro)   LOOP_UNROLLING_122(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_124(idx, step, macro)   LOOP_UNROLLING_123(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_125(idx, step, macro)   LOOP_UNROLLING_124(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_126(idx, step, macro)   LOOP_UNROLLING_125(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_127(idx, step, macro)   LOOP_UNROLLING_126(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_128(idx, step, macro)   LOOP_UNROLLING_127(idx, step, macro); UNROLL_INCR(idx, step, macro)
 
#define LOOP_UNROLLING_STR(type, idx, start, step, num, macro)
 
#define LOOP_UNROLLING(type, idx, start, step, num, macro)   LOOP_UNROLLING_STR(type, idx, start, step, num, macro)
 
#define GET_SPATIAL_IDX(IDX, N0, PARTIAL_N0)   (max((int)(get_global_id(IDX) * N0 - (N0 - PARTIAL_N0) % N0), 0))
 Get the get_global_id with partial N0. More...
 
#define DOT_PRODUCT_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, b, c)   DOT_PRODUCT_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, b, c)
 Dot product integet 8bit function. More...
 
#define DOT_PRODUCT_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, b, c)   DOT_PRODUCT##K0##_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT1_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT2_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT3_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, x, y, val)
 
#define DOT_PRODUCT5_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT6_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT7_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT9_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT10_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT11_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT12_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT13_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT14_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT15_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define DOT_PRODUCT16_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)
 
#define REDUCE_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, c)   REDUCE_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, c)
 Dot product integet 8bit function. More...
 
#define REDUCE_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, c)   DOT_PRODUCT_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, (TILE_VECTOR_TYPE##K0(B_DATA_TYPE))1, c)
 
#define V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)   V_LOAD_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)
 Load a vector from global memory (tensor) More...
 
#define V_LOAD_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)   V_LOAD_##TENSOR_TYPE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y)
 
#define V_LOAD_BUFFER(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y)
 
#define V_LOAD_IMAGE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y)   READ_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y))
 
#define T_LOAD(DATA_TYPE, HEIGHT, WIDTH, TENSOR_TYPE, TENSOR, X, Y, YI_MULTIPLIER, STRIDE_Y, dst)
 Load a tile from global memory (tensor) More...
 
#define T_LOAD_INDIRECT(DATA_TYPE, HEIGHT, WIDTH, TENSOR_TYPE, TENSOR, X, STRIDE_Y, indirect_y, dst)
 Load a tile from global memory (tensor) using an indirect Y index tile. More...
 
#define T_LOAD_INDIRECT_WIDTH_SELECT(DATA_TYPE, HEIGHT, WIDTH0, WIDTH1, TENSOR_TYPE, TENSOR, X, STRIDE_Y, WIDTH1_CONDITION, dst, indirect_y)
 Load a tile from global memory (tensor) using an indirect Y index tile and conditionally use a different length for the load. More...
 
#define T_LOAD_NHWC(DATA_TYPE, TILE_HEIGHT, TILE_WIDTH, TILE_CHANNELS, TENSOR_TYPE, TENSOR, B, Y, X, C, TENSOR_WIDTH, TENSOR_HEIGHT, STRIDE_Y, dst)
 Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout. More...
 
#define T_LOAD_NHWC_WITH_DILATION(DATA_TYPE, TILE_HEIGHT, TILE_WIDTH, TILE_CHANNELS, TENSOR_TYPE, TENSOR, B, Y, X, C, TENSOR_WIDTH, TENSOR_HEIGHT, DILATION_X, DILATION_Y, BOUNDARY_CHECK, dst)
 Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout with dilation for the X and Y increments. More...
 
#define T_LOAD_NHWC_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, B, Y, X, C, TENSOR_WIDTH, TENSOR_HEIGHT, STRIDE_Y, xi, yi, dst)
 Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout using indirect X and Y coordinates. More...
 
#define T_LOAD2D_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, B, Y, X, C, TENSOR_WIDTH, TENSOR_HEIGHT, STRIDE_Y, yi, dst)
 
#define T_LOAD_NDHWC_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, B, Z, Y, X, C, TENSOR_WIDTH, TENSOR_HEIGHT, TENSOR_DEPTH, STRIDE_Y, xi, yi, zi, dst)
 Load a tile from global memory (tensor) when the tensor is stored using a NDHWC layout using indirect X, Y and Z coordinates. More...
 
#define T_STORE_INDIRECT_WIDTH_SELECT(DATA_TYPE, HEIGHT, WIDTH0, WIDTH1, TENSOR_TYPE, TENSOR, X, STRIDE_Y, WIDTH1_CONDITION, src, indirect_y)
 Store a tile to global memory (tensor) using an indirect Y index tile and conditionally use a different length for the store. More...
 
#define T_OFFSET_CORRECTION(ACC_DATA_TYPE, M0, N0, K0, SRC_OFFSET, WEI_OFFSET, lhs, rhs, dst)
 Offset correction for the QASYMM8 computation. More...
 
#define T_QUANTIZE8(SRC_DATA_TYPE, DST_DATA_TYPE, QUANTIZATION_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)   T_QUANTIZE8_STR(SRC_DATA_TYPE, DST_DATA_TYPE, QUANTIZATION_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)
 8-bit quantization with fixed-point scale More...
 
#define T_QUANTIZE8_STR(SRC_DATA_TYPE, DST_DATA_TYPE, QUANTIZATION_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)   T_QUANTIZE8_##QUANTIZATION_TYPE(SRC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)
 
#define T_QUANTIZE8_PER_TENSOR(SRC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)
 8-bit per-tensor quantization with fixed-point scale More...
 
#define T_QUANTIZE8_PER_CHANNEL(SRC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)
 8-bit per-channel quantization with fixed-point scale More...
 
#define T_QUANTIZE8_ASYMMETRIC(SRC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst)
 Quantized the 8-bit tile with fixed-point scale for asymmetric. More...
 
#define T_ROWSET_MASK(DATA_TYPE, M0, N0, VALUE_TO_SET, a, mask)
 Conditional rowset (memset by row) More...
 
#define T_ACTIVATION(DATA_TYPE, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, src, dst)
 Element-wise activation for floating point types. More...
 
#define relu_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)   (max((DATA_TYPE)ZERO_VALUE, x))
 
#define brelu_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)   (min((DATA_TYPE)A_VAL, max((DATA_TYPE)ZERO_VALUE, x)))
 
#define lu_brelu_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)   (min(max(x, (DATA_TYPE)B_VAL), (DATA_TYPE)A_VAL))
 
#define hard_swish_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)   (x * ((min(max((DATA_TYPE)(x + (DATA_TYPE)3.f), (DATA_TYPE)0.f), (DATA_TYPE)6.f)) * (DATA_TYPE)0.166666667f))
 
#define identity_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)   (x)
 
#define ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)   op##_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)
 
#define ACTIVATION_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)   ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)
 
#define V_ADD(A_VAL, B_VAL)   ((A_VAL) + (B_VAL))
 
#define V_DIV(A_VAL, B_VAL)   ((A_VAL) / (B_VAL))
 
#define T_ACTIVATION_QUANTIZED(DATA_TYPE, M0, N0, ACTIVATION_TYPE, ZERO_VALUE, A_VAL, B_VAL, src, dst)
 Element-wise activation for quantized types. More...
 
#define T_ADD(DATA_TYPE, M0, N0, lhs, rhs, dst)
 Element-wise addition between two tiles. More...
 
#define T_ADD_CONSTANT(DATA_TYPE, M0, N0, lhs, rhs_constant, dst)
 Element-wise addition with a constant value. More...
 
#define T_ELTWISE_BROADCAST_ADD_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst)   T_ELTWISE_BROADCAST_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
 
#define T_ELTWISE_BROADCAST_DIV_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst)   T_ELTWISE_BROADCAST_X(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
 
#define T_SCALE_CONSTANT(DATA_TYPE, M0, N0, lhs, rhs_constant, dst)
 Element-wise scale with a constant value. More...
 
#define T_ELTWISE_BROADCAST_X(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
 Element-wise operation with RHS broadcasted (RHS has the X dimension only) More...
 
#define T_ELTWISE_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst)   T_ELTWISE(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
 
#define T_ELTWISE_DIV(DST_DATA_TYPE, M0, N0, lhs, rhs, dst)   T_ELTWISE(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
 
#define T_ELTWISE(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
 Element-wise operation between two tiles (LHS and RHS) More...
 
#define T_FLOOR(DST_DATA_TYPE, M0, N0, src, dst)
 Floor operation on a tile. More...
 
#define T_MMUL(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, LHS_LAYOUT, RHS_LAYOUT, lhs, rhs, dst)   T_MMUL_##LHS_LAYOUT##_##RHS_LAYOUT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
 Matrix multiplication. More...
 
#define T_MMUL_NT_T(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)   T_MMUL_NT_T_##LHS_DATA_TYPE##_##RHS_DATA_TYPE##_##DST_DATA_TYPE(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
 
#define T_MMUL_NT_T_float_float_float(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)   T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
 
#define T_MMUL_NT_T_half_half_float(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)   T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
 
#define T_MMUL_NT_T_half_half_half(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)   T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
 
#define T_MMUL_NT_T_char_char_int(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)   T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
 
#define T_MMUL_NT_T_uchar_uchar_uint(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)   T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
 
#define T_MMUL_NT_T_uchar_uchar_int(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)   T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
 
#define T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
 
#define T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
 

Macro Definition Documentation

◆ ACT_OP_QUANTIZED

#define ACT_OP_QUANTIZED (   op,
  DATA_TYPE,
  VEC_SIZE,
  ZERO_VALUE,
  A_VAL,
  B_VAL,
 
)    op##_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)

Definition at line 981 of file tile_helpers.h.

◆ ACTIVATION_QUANTIZED

#define ACTIVATION_QUANTIZED (   op,
  DATA_TYPE,
  VEC_SIZE,
  ZERO_VALUE,
  A_VAL,
  B_VAL,
 
)    ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)

Definition at line 982 of file tile_helpers.h.

◆ brelu_op_quantized

#define brelu_op_quantized (   DATA_TYPE,
  VEC_SIZE,
  ZERO_VALUE,
  A_VAL,
  B_VAL,
 
)    (min((DATA_TYPE)A_VAL, max((DATA_TYPE)ZERO_VALUE, x)))

Definition at line 973 of file tile_helpers.h.

◆ DOT_PRODUCT10_INTEGER8

#define DOT_PRODUCT10_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c); \
DOT_PRODUCT2_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89), ((b).s89), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 392 of file tile_helpers.h.

◆ DOT_PRODUCT11_INTEGER8

#define DOT_PRODUCT11_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c); \
DOT_PRODUCT3_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89A), ((b).s89A), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 397 of file tile_helpers.h.

◆ DOT_PRODUCT12_INTEGER8

#define DOT_PRODUCT12_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c); \
DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89AB), ((b).s89AB), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 402 of file tile_helpers.h.

◆ DOT_PRODUCT13_INTEGER8

#define DOT_PRODUCT13_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c); \
DOT_PRODUCT5_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89ABC), ((b).s89ABC), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 407 of file tile_helpers.h.

◆ DOT_PRODUCT14_INTEGER8

#define DOT_PRODUCT14_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c); \
DOT_PRODUCT6_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89ABCD), ((b).s89ABCD), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 412 of file tile_helpers.h.

◆ DOT_PRODUCT15_INTEGER8

#define DOT_PRODUCT15_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c); \
DOT_PRODUCT7_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89ABCDE), ((b).s89ABCDE), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 417 of file tile_helpers.h.

◆ DOT_PRODUCT16_INTEGER8

#define DOT_PRODUCT16_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).lo), ((b).lo), c); \
DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).hi), ((b).hi), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 422 of file tile_helpers.h.

◆ DOT_PRODUCT1_INTEGER8

#define DOT_PRODUCT1_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
c += (C_DATA_TYPE)(a) * (C_DATA_TYPE)(b); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 332 of file tile_helpers.h.

◆ DOT_PRODUCT2_INTEGER8

#define DOT_PRODUCT2_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
c += (C_DATA_TYPE)(a).s0 * (C_DATA_TYPE)(b).s0; \
c += (C_DATA_TYPE)(a).s1 * (C_DATA_TYPE)(b).s1; \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 349 of file tile_helpers.h.

◆ DOT_PRODUCT3_INTEGER8

#define DOT_PRODUCT3_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT2_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c); \
c += (C_DATA_TYPE)(a).s2 * (C_DATA_TYPE)(b).s2; \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 354 of file tile_helpers.h.

◆ DOT_PRODUCT4_INTEGER8

#define DOT_PRODUCT4_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  x,
  y,
  val 
)
Value:
({ \
val += (C_DATA_TYPE)(x).s0 * (C_DATA_TYPE)(y).s0; \
val += (C_DATA_TYPE)(x).s1 * (C_DATA_TYPE)(y).s1; \
val += (C_DATA_TYPE)(x).s2 * (C_DATA_TYPE)(y).s2; \
val += (C_DATA_TYPE)(x).s3 * (C_DATA_TYPE)(y).s3; \
})

Definition at line 359 of file tile_helpers.h.

◆ DOT_PRODUCT5_INTEGER8

#define DOT_PRODUCT5_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s0123), ((b).s0123), c); \
DOT_PRODUCT1_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s4), ((b).s4), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 367 of file tile_helpers.h.

◆ DOT_PRODUCT6_INTEGER8

#define DOT_PRODUCT6_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s0123), ((b).s0123), c); \
DOT_PRODUCT2_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s45), ((b).s45), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 372 of file tile_helpers.h.

◆ DOT_PRODUCT7_INTEGER8

#define DOT_PRODUCT7_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s0123), ((b).s0123), c); \
DOT_PRODUCT3_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s456), ((b).s456), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 377 of file tile_helpers.h.

◆ DOT_PRODUCT8_INTEGER8

#define DOT_PRODUCT8_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).lo), ((b).lo), c); \
DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).hi), ((b).hi), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 382 of file tile_helpers.h.

◆ DOT_PRODUCT9_INTEGER8

#define DOT_PRODUCT9_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  a,
  b,
 
)
Value:
({ \
DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c); \
DOT_PRODUCT1_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s8), ((b).s8), c); \
})
SimpleTensor< float > b
Definition: DFT.cpp:157

Definition at line 387 of file tile_helpers.h.

◆ DOT_PRODUCT_INTEGER8

#define DOT_PRODUCT_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  K0,
  a,
  b,
 
)    DOT_PRODUCT_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, b, c)

Dot product integet 8bit function.

Note
Performs: c += dot(a, b)
Parameters
[in]A_DATA_TYPEA (lhs) data type
[in]B_DATA_TYPEB (rhs) data type
[in]C_DATA_TYPEC (accumulator) data type
[in]K0Number of accumulations
[in]aOpenCL vector a
[in]bOpenCL vector b
[in]cScalar variable c

Definition at line 330 of file tile_helpers.h.

◆ DOT_PRODUCT_INTEGER8_STR

#define DOT_PRODUCT_INTEGER8_STR (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  K0,
  a,
  b,
 
)    DOT_PRODUCT##K0##_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

Definition at line 331 of file tile_helpers.h.

◆ GET_SPATIAL_IDX

#define GET_SPATIAL_IDX (   IDX,
  N0,
  PARTIAL_N0 
)    (max((int)(get_global_id(IDX) * N0 - (N0 - PARTIAL_N0) % N0), 0))

Get the get_global_id with partial N0.

This function is useful when the dimension is not multiple of N0 and we need to use a partial N0 to avoid out-of-bound read/write

Note
PARTIAL_N0 is used for get_global_id(n) = 0.
Parameters
[in]IDXget_global_id index (0,1 and 2 only)
[in]N0Number of elements read/written on the IDX direction
[in]PARTIAL_N0Number of elements read/written on the IDX direction for get_global_id(IDX) = 0. If zero, the Number of elements read/written on the IDX direction for get_global_id(IDX) = 0 is N0

Definition at line 316 of file tile_helpers.h.

Referenced by direct_convolution3d_ndhwc(), and direct_convolution_nhwc().

◆ hard_swish_op_quantized

#define hard_swish_op_quantized (   DATA_TYPE,
  VEC_SIZE,
  ZERO_VALUE,
  A_VAL,
  B_VAL,
 
)    (x * ((min(max((DATA_TYPE)(x + (DATA_TYPE)3.f), (DATA_TYPE)0.f), (DATA_TYPE)6.f)) * (DATA_TYPE)0.166666667f))

Definition at line 977 of file tile_helpers.h.

◆ identity_op_quantized

#define identity_op_quantized (   DATA_TYPE,
  VEC_SIZE,
  ZERO_VALUE,
  A_VAL,
  B_VAL,
 
)    (x)

Definition at line 979 of file tile_helpers.h.

◆ LOOP_UNROLLING

#define LOOP_UNROLLING (   type,
  idx,
  start,
  step,
  num,
  macro 
)    LOOP_UNROLLING_STR(type, idx, start, step, num, macro)

◆ LOOP_UNROLLING_1

#define LOOP_UNROLLING_1 (   idx,
  step,
  macro 
)    (macro)

Definition at line 160 of file tile_helpers.h.

◆ LOOP_UNROLLING_10

#define LOOP_UNROLLING_10 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_9(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 169 of file tile_helpers.h.

◆ LOOP_UNROLLING_100

#define LOOP_UNROLLING_100 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_99(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 259 of file tile_helpers.h.

◆ LOOP_UNROLLING_101

#define LOOP_UNROLLING_101 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_100(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 260 of file tile_helpers.h.

◆ LOOP_UNROLLING_102

#define LOOP_UNROLLING_102 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_101(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 261 of file tile_helpers.h.

◆ LOOP_UNROLLING_103

#define LOOP_UNROLLING_103 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_102(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 262 of file tile_helpers.h.

◆ LOOP_UNROLLING_104

#define LOOP_UNROLLING_104 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_103(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 263 of file tile_helpers.h.

◆ LOOP_UNROLLING_105

#define LOOP_UNROLLING_105 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_104(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 264 of file tile_helpers.h.

◆ LOOP_UNROLLING_106

#define LOOP_UNROLLING_106 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_105(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 265 of file tile_helpers.h.

◆ LOOP_UNROLLING_107

#define LOOP_UNROLLING_107 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_106(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 266 of file tile_helpers.h.

◆ LOOP_UNROLLING_108

#define LOOP_UNROLLING_108 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_107(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 267 of file tile_helpers.h.

◆ LOOP_UNROLLING_109

#define LOOP_UNROLLING_109 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_108(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 268 of file tile_helpers.h.

◆ LOOP_UNROLLING_11

#define LOOP_UNROLLING_11 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_10(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 170 of file tile_helpers.h.

◆ LOOP_UNROLLING_110

#define LOOP_UNROLLING_110 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_109(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 269 of file tile_helpers.h.

◆ LOOP_UNROLLING_111

#define LOOP_UNROLLING_111 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_110(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 270 of file tile_helpers.h.

◆ LOOP_UNROLLING_112

#define LOOP_UNROLLING_112 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_111(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 271 of file tile_helpers.h.

◆ LOOP_UNROLLING_113

#define LOOP_UNROLLING_113 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_112(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 272 of file tile_helpers.h.

◆ LOOP_UNROLLING_114

#define LOOP_UNROLLING_114 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_113(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 273 of file tile_helpers.h.

◆ LOOP_UNROLLING_115

#define LOOP_UNROLLING_115 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_114(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 274 of file tile_helpers.h.

◆ LOOP_UNROLLING_116

#define LOOP_UNROLLING_116 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_115(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 275 of file tile_helpers.h.

◆ LOOP_UNROLLING_117

#define LOOP_UNROLLING_117 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_116(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 276 of file tile_helpers.h.

◆ LOOP_UNROLLING_118

#define LOOP_UNROLLING_118 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_117(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 277 of file tile_helpers.h.

◆ LOOP_UNROLLING_119

#define LOOP_UNROLLING_119 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_118(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 278 of file tile_helpers.h.

◆ LOOP_UNROLLING_12

#define LOOP_UNROLLING_12 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_11(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 171 of file tile_helpers.h.

◆ LOOP_UNROLLING_120

#define LOOP_UNROLLING_120 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_119(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 279 of file tile_helpers.h.

◆ LOOP_UNROLLING_121

#define LOOP_UNROLLING_121 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_120(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 280 of file tile_helpers.h.

◆ LOOP_UNROLLING_122

#define LOOP_UNROLLING_122 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_121(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 281 of file tile_helpers.h.

◆ LOOP_UNROLLING_123

#define LOOP_UNROLLING_123 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_122(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 282 of file tile_helpers.h.

◆ LOOP_UNROLLING_124

#define LOOP_UNROLLING_124 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_123(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 283 of file tile_helpers.h.

◆ LOOP_UNROLLING_125

#define LOOP_UNROLLING_125 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_124(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 284 of file tile_helpers.h.

◆ LOOP_UNROLLING_126

#define LOOP_UNROLLING_126 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_125(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 285 of file tile_helpers.h.

◆ LOOP_UNROLLING_127

#define LOOP_UNROLLING_127 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_126(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 286 of file tile_helpers.h.

◆ LOOP_UNROLLING_128

#define LOOP_UNROLLING_128 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_127(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 287 of file tile_helpers.h.

◆ LOOP_UNROLLING_13

#define LOOP_UNROLLING_13 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_12(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 172 of file tile_helpers.h.

◆ LOOP_UNROLLING_14

#define LOOP_UNROLLING_14 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_13(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 173 of file tile_helpers.h.

◆ LOOP_UNROLLING_15

#define LOOP_UNROLLING_15 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_14(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 174 of file tile_helpers.h.

◆ LOOP_UNROLLING_16

#define LOOP_UNROLLING_16 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_15(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 175 of file tile_helpers.h.

◆ LOOP_UNROLLING_17

#define LOOP_UNROLLING_17 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_16(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 176 of file tile_helpers.h.

◆ LOOP_UNROLLING_18

#define LOOP_UNROLLING_18 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_17(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 177 of file tile_helpers.h.

◆ LOOP_UNROLLING_19

#define LOOP_UNROLLING_19 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_18(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 178 of file tile_helpers.h.

◆ LOOP_UNROLLING_2

#define LOOP_UNROLLING_2 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_1(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 161 of file tile_helpers.h.

◆ LOOP_UNROLLING_20

#define LOOP_UNROLLING_20 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_19(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 179 of file tile_helpers.h.

◆ LOOP_UNROLLING_21

#define LOOP_UNROLLING_21 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_20(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 180 of file tile_helpers.h.

◆ LOOP_UNROLLING_22

#define LOOP_UNROLLING_22 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_21(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 181 of file tile_helpers.h.

◆ LOOP_UNROLLING_23

#define LOOP_UNROLLING_23 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_22(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 182 of file tile_helpers.h.

◆ LOOP_UNROLLING_24

#define LOOP_UNROLLING_24 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_23(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 183 of file tile_helpers.h.

◆ LOOP_UNROLLING_25

#define LOOP_UNROLLING_25 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_24(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 184 of file tile_helpers.h.

◆ LOOP_UNROLLING_26

#define LOOP_UNROLLING_26 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_25(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 185 of file tile_helpers.h.

◆ LOOP_UNROLLING_27

#define LOOP_UNROLLING_27 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_26(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 186 of file tile_helpers.h.

◆ LOOP_UNROLLING_28

#define LOOP_UNROLLING_28 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_27(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 187 of file tile_helpers.h.

◆ LOOP_UNROLLING_29

#define LOOP_UNROLLING_29 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_28(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 188 of file tile_helpers.h.

◆ LOOP_UNROLLING_3

#define LOOP_UNROLLING_3 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_2(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 162 of file tile_helpers.h.

◆ LOOP_UNROLLING_30

#define LOOP_UNROLLING_30 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_29(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 189 of file tile_helpers.h.

◆ LOOP_UNROLLING_31

#define LOOP_UNROLLING_31 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_30(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 190 of file tile_helpers.h.

◆ LOOP_UNROLLING_32

#define LOOP_UNROLLING_32 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_31(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 191 of file tile_helpers.h.

◆ LOOP_UNROLLING_33

#define LOOP_UNROLLING_33 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_32(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 192 of file tile_helpers.h.

◆ LOOP_UNROLLING_34

#define LOOP_UNROLLING_34 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_33(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 193 of file tile_helpers.h.

◆ LOOP_UNROLLING_35

#define LOOP_UNROLLING_35 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_34(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 194 of file tile_helpers.h.

◆ LOOP_UNROLLING_36

#define LOOP_UNROLLING_36 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_35(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 195 of file tile_helpers.h.

◆ LOOP_UNROLLING_37

#define LOOP_UNROLLING_37 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_36(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 196 of file tile_helpers.h.

◆ LOOP_UNROLLING_38

#define LOOP_UNROLLING_38 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_37(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 197 of file tile_helpers.h.

◆ LOOP_UNROLLING_39

#define LOOP_UNROLLING_39 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_38(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 198 of file tile_helpers.h.

◆ LOOP_UNROLLING_4

#define LOOP_UNROLLING_4 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_3(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 163 of file tile_helpers.h.

◆ LOOP_UNROLLING_40

#define LOOP_UNROLLING_40 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_39(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 199 of file tile_helpers.h.

◆ LOOP_UNROLLING_41

#define LOOP_UNROLLING_41 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_40(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 200 of file tile_helpers.h.

◆ LOOP_UNROLLING_42

#define LOOP_UNROLLING_42 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_41(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 201 of file tile_helpers.h.

◆ LOOP_UNROLLING_43

#define LOOP_UNROLLING_43 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_42(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 202 of file tile_helpers.h.

◆ LOOP_UNROLLING_44

#define LOOP_UNROLLING_44 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_43(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 203 of file tile_helpers.h.

◆ LOOP_UNROLLING_45

#define LOOP_UNROLLING_45 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_44(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 204 of file tile_helpers.h.

◆ LOOP_UNROLLING_46

#define LOOP_UNROLLING_46 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_45(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 205 of file tile_helpers.h.

◆ LOOP_UNROLLING_47

#define LOOP_UNROLLING_47 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_46(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 206 of file tile_helpers.h.

◆ LOOP_UNROLLING_48

#define LOOP_UNROLLING_48 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_47(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 207 of file tile_helpers.h.

◆ LOOP_UNROLLING_49

#define LOOP_UNROLLING_49 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_48(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 208 of file tile_helpers.h.

◆ LOOP_UNROLLING_5

#define LOOP_UNROLLING_5 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_4(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 164 of file tile_helpers.h.

◆ LOOP_UNROLLING_50

#define LOOP_UNROLLING_50 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_49(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 209 of file tile_helpers.h.

◆ LOOP_UNROLLING_51

#define LOOP_UNROLLING_51 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_50(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 210 of file tile_helpers.h.

◆ LOOP_UNROLLING_52

#define LOOP_UNROLLING_52 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_51(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 211 of file tile_helpers.h.

◆ LOOP_UNROLLING_53

#define LOOP_UNROLLING_53 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_52(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 212 of file tile_helpers.h.

◆ LOOP_UNROLLING_54

#define LOOP_UNROLLING_54 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_53(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 213 of file tile_helpers.h.

◆ LOOP_UNROLLING_55

#define LOOP_UNROLLING_55 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_54(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 214 of file tile_helpers.h.

◆ LOOP_UNROLLING_56

#define LOOP_UNROLLING_56 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_55(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 215 of file tile_helpers.h.

◆ LOOP_UNROLLING_57

#define LOOP_UNROLLING_57 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_56(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 216 of file tile_helpers.h.

◆ LOOP_UNROLLING_58

#define LOOP_UNROLLING_58 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_57(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 217 of file tile_helpers.h.

◆ LOOP_UNROLLING_59

#define LOOP_UNROLLING_59 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_58(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 218 of file tile_helpers.h.

◆ LOOP_UNROLLING_6

#define LOOP_UNROLLING_6 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_5(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 165 of file tile_helpers.h.

◆ LOOP_UNROLLING_60

#define LOOP_UNROLLING_60 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_59(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 219 of file tile_helpers.h.

◆ LOOP_UNROLLING_61

#define LOOP_UNROLLING_61 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_60(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 220 of file tile_helpers.h.

◆ LOOP_UNROLLING_62

#define LOOP_UNROLLING_62 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_61(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 221 of file tile_helpers.h.

◆ LOOP_UNROLLING_63

#define LOOP_UNROLLING_63 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_62(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 222 of file tile_helpers.h.

◆ LOOP_UNROLLING_64

#define LOOP_UNROLLING_64 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_63(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 223 of file tile_helpers.h.

◆ LOOP_UNROLLING_65

#define LOOP_UNROLLING_65 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_64(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 224 of file tile_helpers.h.

◆ LOOP_UNROLLING_66

#define LOOP_UNROLLING_66 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_65(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 225 of file tile_helpers.h.

◆ LOOP_UNROLLING_67

#define LOOP_UNROLLING_67 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_66(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 226 of file tile_helpers.h.

◆ LOOP_UNROLLING_68

#define LOOP_UNROLLING_68 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_67(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 227 of file tile_helpers.h.

◆ LOOP_UNROLLING_69

#define LOOP_UNROLLING_69 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_68(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 228 of file tile_helpers.h.

◆ LOOP_UNROLLING_7

#define LOOP_UNROLLING_7 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_6(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 166 of file tile_helpers.h.

◆ LOOP_UNROLLING_70

#define LOOP_UNROLLING_70 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_69(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 229 of file tile_helpers.h.

◆ LOOP_UNROLLING_71

#define LOOP_UNROLLING_71 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_70(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 230 of file tile_helpers.h.

◆ LOOP_UNROLLING_72

#define LOOP_UNROLLING_72 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_71(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 231 of file tile_helpers.h.

◆ LOOP_UNROLLING_73

#define LOOP_UNROLLING_73 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_72(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 232 of file tile_helpers.h.

◆ LOOP_UNROLLING_74

#define LOOP_UNROLLING_74 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_73(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 233 of file tile_helpers.h.

◆ LOOP_UNROLLING_75

#define LOOP_UNROLLING_75 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_74(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 234 of file tile_helpers.h.

◆ LOOP_UNROLLING_76

#define LOOP_UNROLLING_76 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_75(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 235 of file tile_helpers.h.

◆ LOOP_UNROLLING_77

#define LOOP_UNROLLING_77 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_76(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 236 of file tile_helpers.h.

◆ LOOP_UNROLLING_78

#define LOOP_UNROLLING_78 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_77(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 237 of file tile_helpers.h.

◆ LOOP_UNROLLING_79

#define LOOP_UNROLLING_79 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_78(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 238 of file tile_helpers.h.

◆ LOOP_UNROLLING_8

#define LOOP_UNROLLING_8 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_7(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 167 of file tile_helpers.h.

◆ LOOP_UNROLLING_80

#define LOOP_UNROLLING_80 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_79(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 239 of file tile_helpers.h.

◆ LOOP_UNROLLING_81

#define LOOP_UNROLLING_81 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_80(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 240 of file tile_helpers.h.

◆ LOOP_UNROLLING_82

#define LOOP_UNROLLING_82 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_81(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 241 of file tile_helpers.h.

◆ LOOP_UNROLLING_83

#define LOOP_UNROLLING_83 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_82(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 242 of file tile_helpers.h.

◆ LOOP_UNROLLING_84

#define LOOP_UNROLLING_84 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_83(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 243 of file tile_helpers.h.

◆ LOOP_UNROLLING_85

#define LOOP_UNROLLING_85 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_84(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 244 of file tile_helpers.h.

◆ LOOP_UNROLLING_86

#define LOOP_UNROLLING_86 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_85(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 245 of file tile_helpers.h.

◆ LOOP_UNROLLING_87

#define LOOP_UNROLLING_87 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_86(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 246 of file tile_helpers.h.

◆ LOOP_UNROLLING_88

#define LOOP_UNROLLING_88 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_87(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 247 of file tile_helpers.h.

◆ LOOP_UNROLLING_89

#define LOOP_UNROLLING_89 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_88(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 248 of file tile_helpers.h.

◆ LOOP_UNROLLING_9

#define LOOP_UNROLLING_9 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_8(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 168 of file tile_helpers.h.

◆ LOOP_UNROLLING_90

#define LOOP_UNROLLING_90 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_89(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 249 of file tile_helpers.h.

◆ LOOP_UNROLLING_91

#define LOOP_UNROLLING_91 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_90(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 250 of file tile_helpers.h.

◆ LOOP_UNROLLING_92

#define LOOP_UNROLLING_92 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_91(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 251 of file tile_helpers.h.

◆ LOOP_UNROLLING_93

#define LOOP_UNROLLING_93 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_92(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 252 of file tile_helpers.h.

◆ LOOP_UNROLLING_94

#define LOOP_UNROLLING_94 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_93(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 253 of file tile_helpers.h.

◆ LOOP_UNROLLING_95

#define LOOP_UNROLLING_95 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_94(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 254 of file tile_helpers.h.

◆ LOOP_UNROLLING_96

#define LOOP_UNROLLING_96 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_95(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 255 of file tile_helpers.h.

◆ LOOP_UNROLLING_97

#define LOOP_UNROLLING_97 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_96(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 256 of file tile_helpers.h.

◆ LOOP_UNROLLING_98

#define LOOP_UNROLLING_98 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_97(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 257 of file tile_helpers.h.

◆ LOOP_UNROLLING_99

#define LOOP_UNROLLING_99 (   idx,
  step,
  macro 
)    LOOP_UNROLLING_98(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 258 of file tile_helpers.h.

◆ LOOP_UNROLLING_STR

#define LOOP_UNROLLING_STR (   type,
  idx,
  start,
  step,
  num,
  macro 
)
Value:
{ \
type idx = start; \
LOOP_UNROLLING_##num(idx, step, macro); \
}
constexpr int step
Definition: fp32.cpp:35

Definition at line 289 of file tile_helpers.h.

◆ lu_brelu_op_quantized

#define lu_brelu_op_quantized (   DATA_TYPE,
  VEC_SIZE,
  ZERO_VALUE,
  A_VAL,
  B_VAL,
 
)    (min(max(x, (DATA_TYPE)B_VAL), (DATA_TYPE)A_VAL))

Definition at line 975 of file tile_helpers.h.

◆ REDUCE_INTEGER8

#define REDUCE_INTEGER8 (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  K0,
  a,
 
)    REDUCE_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, c)

Dot product integet 8bit function.

Note
Performs: c += dot(a, b)
Parameters
[in]A_DATA_TYPEA (lhs) data type
[in]B_DATA_TYPEB (rhs) data type
[in]C_DATA_TYPEC (accumulator) data type
[in]K0Number of accumulations
[in]aOpenCL vector a
[in]cScalar variable c

Definition at line 439 of file tile_helpers.h.

◆ REDUCE_INTEGER8_STR

#define REDUCE_INTEGER8_STR (   A_DATA_TYPE,
  B_DATA_TYPE,
  C_DATA_TYPE,
  K0,
  a,
 
)    DOT_PRODUCT_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, (TILE_VECTOR_TYPE##K0(B_DATA_TYPE))1, c)

Definition at line 440 of file tile_helpers.h.

◆ relu_op_quantized

#define relu_op_quantized (   DATA_TYPE,
  VEC_SIZE,
  ZERO_VALUE,
  A_VAL,
  B_VAL,
 
)    (max((DATA_TYPE)ZERO_VALUE, x))

Definition at line 971 of file tile_helpers.h.

◆ T_ACTIVATION

#define T_ACTIVATION (   DATA_TYPE,
  M0,
  N0,
  ACTIVATION_TYPE,
  A_VAL,
  B_VAL,
  src,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
dst[_m0].v = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, N0, src[_m0].v, A_VAL, B_VAL); \
}) \
})
SimpleTensor< float > src
Definition: DFT.cpp:155
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)

Element-wise activation for floating point types.

Note
Performs: activation(LHS) = DST
Parameters
[in]DATA_TYPESRC/DST data type
[in]M0Number of SRC/DST rows
[in]N0Number of SRC/DST columns
[in]ACTIVATION_TYPEActivation type
[in]A_VALA value used for the activation (e.g. tanh_op, brelu,..)
[in]B_VALB value used for the activation (e.g. tanh_op, brelu,..)
[out]srcSRC tile
[out]dstDST tile

Definition at line 962 of file tile_helpers.h.

Referenced by direct_convolution_nhwc().

◆ T_ACTIVATION_QUANTIZED

#define T_ACTIVATION_QUANTIZED (   DATA_TYPE,
  M0,
  N0,
  ACTIVATION_TYPE,
  ZERO_VALUE,
  A_VAL,
  B_VAL,
  src,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
dst[_m0].v = ACTIVATION_QUANTIZED(ACTIVATION_TYPE, DATA_TYPE, N0, ZERO_VALUE, A_VAL, B_VAL, src[_m0].v); \
}) \
})
SimpleTensor< float > src
Definition: DFT.cpp:155
#define ACTIVATION_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)
Definition: tile_helpers.h:982

Element-wise activation for quantized types.

Note
Performs: activation(LHS) = DST
Parameters
[in]DATA_TYPESRC/DST data type
[in]M0Number of SRC/DST rows
[in]N0Number of SRC/DST columns
[in]ACTIVATION_TYPEActivation type
[in]ZERO_VALUEThe zero value to consider in the computation
[in]A_VALA value used for the activation (e.g. tanh_op, brelu,..)
[in]B_VALB value used for the activation (e.g. tanh_op, brelu,..)
[out]srcSRC tile
[out]dstDST tile

Definition at line 1001 of file tile_helpers.h.

◆ T_ADD

#define T_ADD (   DATA_TYPE,
  M0,
  N0,
  lhs,
  rhs,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
dst[_m0].v = lhs[_m0].v + rhs[_m0].v; \
}) \
})

Element-wise addition between two tiles.

Note
Performs: LHS + RHS = DST
Parameters
[in]DATA_TYPELHS/RHS/DST data type
[in]M0Number of LHS rows
[in]N0Number of LHS columns
[in]lhsLHS tile
[in]rhsConstant RHS tile
[out]dstDST tile

Definition at line 1020 of file tile_helpers.h.

◆ T_ADD_CONSTANT

#define T_ADD_CONSTANT (   DATA_TYPE,
  M0,
  N0,
  lhs,
  rhs_constant,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
dst[_m0].v = lhs[_m0].v + (DATA_TYPE)rhs_constant; \
}) \
})

Element-wise addition with a constant value.

Note
Performs: LHS + constant = DST
Parameters
[in]DATA_TYPELHS/RHS/DST data type
[in]M0Number of LHS rows
[in]N0Number of LHS columns
[in]lhsLHS tile
[in]rhs_constantConstant value
[out]dstDST tile

Definition at line 1039 of file tile_helpers.h.

Referenced by direct_convolution3d_ndhwc(), and direct_convolution_nhwc().

◆ T_ELTWISE

#define T_ELTWISE (   T_ELWISE_OP,
  DST_DATA_TYPE,
  M0,
  N0,
  lhs,
  rhs,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \
}) \
})
#define CONVERT(x, type)
Definition: helpers.h:731
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Element-wise operation between two tiles (LHS and RHS)

Note
Performs: LHS OP RHS = DST
Both tiles must have same data type
Parameters
[in]T_ELWISE_OPElementwise operator to perform
[in]DST_DATA_TYPEDST data type
[in]M0Number of LHS rows
[in]N0Number of LHS columns
[in]lhsLHS tile
[in]rhsRHS tile
[out]dstDST tile

Definition at line 1106 of file tile_helpers.h.

◆ T_ELTWISE_ADD

#define T_ELTWISE_ADD (   DST_DATA_TYPE,
  M0,
  N0,
  lhs,
  rhs,
  dst 
)    T_ELTWISE(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1090 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_ADD_X

#define T_ELTWISE_BROADCAST_ADD_X (   DST_DATA_TYPE,
  M0,
  N0,
  lhs,
  rhs,
  dst 
)    T_ELTWISE_BROADCAST_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1047 of file tile_helpers.h.

Referenced by direct_convolution3d_ndhwc(), and direct_convolution_nhwc().

◆ T_ELTWISE_BROADCAST_DIV_X

#define T_ELTWISE_BROADCAST_DIV_X (   DST_DATA_TYPE,
  M0,
  N0,
  lhs,
  rhs,
  dst 
)    T_ELTWISE_BROADCAST_X(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1048 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_X

#define T_ELTWISE_BROADCAST_X (   T_ELWISE_OP,
  DST_DATA_TYPE,
  M0,
  N0,
  lhs,
  rhs,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \
}) \
})
#define CONVERT(x, type)
Definition: helpers.h:731
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Element-wise operation with RHS broadcasted (RHS has the X dimension only)

Note
Performs: LHS OP RHS[broadcasted] = DST
Both tiles must have same data type
Parameters
[in]T_ELWISE_OPElementwise operator to perform
[in]DST_DATA_TYPEDST data type
[in]M0Number of LHS rows
[in]N0Number of LHS columns
[in]lhsLHS tile
[in]rhsRHS tile
[out]dstDST tile

Definition at line 1082 of file tile_helpers.h.

◆ T_ELTWISE_DIV

#define T_ELTWISE_DIV (   DST_DATA_TYPE,
  M0,
  N0,
  lhs,
  rhs,
  dst 
)    T_ELTWISE(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1091 of file tile_helpers.h.

◆ T_FLOOR

#define T_FLOOR (   DST_DATA_TYPE,
  M0,
  N0,
  src,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
dst[_m0].v = floor(CONVERT(src[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \
}) \
})
#define CONVERT(x, type)
Definition: helpers.h:731
SimpleTensor< float > src
Definition: DFT.cpp:155
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Floor operation on a tile.

Note
Performs: floor(SRC) = DST
Both tiles must have same data type
Parameters
[in]DST_DATA_TYPEDST data type
[in]M0Number of SRC rows
[in]N0Number of SRC columns
[in]srcLHS tile
[out]dstDST tile

Definition at line 1125 of file tile_helpers.h.

◆ T_LOAD

#define T_LOAD (   DATA_TYPE,
  HEIGHT,
  WIDTH,
  TENSOR_TYPE,
  TENSOR,
  X,
  Y,
  YI_MULTIPLIER,
  STRIDE_Y,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _i, 0, 1, HEIGHT, \
{ \
dst[_i].v = V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, ((Y) + _i * (int)(YI_MULTIPLIER)), STRIDE_Y); \
}) \
})
#define V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)
Load a vector from global memory (tensor)
Definition: tile_helpers.h:453
#define X(model)
Definition: CPPTypes.h:60

Load a tile from global memory (tensor)

Parameters
[in]DATA_TYPEData type
[in]HEIGHTNumber of dst rows
[in]WIDTHNumber of dst columns
[in]TENSOR_TYPEType of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
[in]TENSORTensor basename
[in]XStarting X position
[in]YStarting Y position
[in]YI_MULTIPLIERParameter used to multiply the internal row increment (_i). In common cases should be 1 but it becomes useful when we want to load rows which are multiple of STRIDE_Y. (e.g. loading the weights of convolution layer). In this case the address calculation is performed as: (Y + _i * Y_MULTIPLIER) * STRIDE_Y
[in]STRIDE_YStride Y (in bytes) used to load each row.
[out]dstOutput tile

Definition at line 476 of file tile_helpers.h.

Referenced by direct_convolution_nhwc().

◆ T_LOAD2D_INDIRECT

#define T_LOAD2D_INDIRECT (   DATA_TYPE,
  TILE_AREA,
  TILE_CHANNELS,
  TENSOR_TYPE,
  TENSOR,
  B,
  Y,
  X,
  C,
  TENSOR_WIDTH,
  TENSOR_HEIGHT,
  STRIDE_Y,
  yi,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \
{ \
if(yi[_i].v >= 0) \
{ \
dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[_i].v, STRIDE_Y); \
} \
}) \
})
#define V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)
Load a vector from global memory (tensor)
Definition: tile_helpers.h:453

Definition at line 656 of file tile_helpers.h.

Referenced by direct_convolution_nhwc().

◆ T_LOAD_INDIRECT

#define T_LOAD_INDIRECT (   DATA_TYPE,
  HEIGHT,
  WIDTH,
  TENSOR_TYPE,
  TENSOR,
  X,
  STRIDE_Y,
  indirect_y,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _i, 0, 1, HEIGHT, \
{ \
dst[_i].v = V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, (indirect_y[_i].v), STRIDE_Y); \
}) \
})
#define V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)
Load a vector from global memory (tensor)
Definition: tile_helpers.h:453
#define X(model)
Definition: CPPTypes.h:60

Load a tile from global memory (tensor) using an indirect Y index tile.

Parameters
[in]DATA_TYPEData type
[in]HEIGHTNumber of dst rows
[in]WIDTHNumber of dst columns
[in]TENSOR_TYPEType of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
[in]TENSORTensor basename
[in]XStarting X position
[in]STRIDE_YStride Y (in bytes)
[in]indirect_yIndirect Y index tile
[out]dstOutput tile

Definition at line 497 of file tile_helpers.h.

◆ T_LOAD_INDIRECT_WIDTH_SELECT

#define T_LOAD_INDIRECT_WIDTH_SELECT (   DATA_TYPE,
  HEIGHT,
  WIDTH0,
  WIDTH1,
  TENSOR_TYPE,
  TENSOR,
  X,
  STRIDE_Y,
  WIDTH1_CONDITION,
  dst,
  indirect_y 
)
Value:
({ \
if(WIDTH1_CONDITION) \
{ \
LOOP_UNROLLING(int, _i, 0, 1, HEIGHT, \
{ \
VLOAD_PARTIAL(WIDTH0, WIDTH1) \
(dst[HEIGHT - 1 - _i].v, 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \
}) \
} \
else \
{ \
LOOP_UNROLLING(int, _i, 0, 1, HEIGHT, \
{ \
dst[HEIGHT - 1 - _i].v = V_LOAD(DATA_TYPE, WIDTH0, TENSOR_TYPE, TENSOR, X, (indirect_y[HEIGHT - 1 - _i].v), STRIDE_Y); \
}) \
} \
})
#define V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)
Load a vector from global memory (tensor)
Definition: tile_helpers.h:453
#define X(model)
Definition: CPPTypes.h:60

Load a tile from global memory (tensor) using an indirect Y index tile and conditionally use a different length for the load.

Note
If WIDTH1_CONDITION is true, the load will use the WIDTH1 length for the store
The vectors are stored in reverse order so the invalid rows are overwritten by the valid ones
Parameters
[in]DATA_TYPEData type
[in]HEIGHTNumber of dst rows
[in]WIDTH0Store width to use if WIDTH1_CONDITION = false
[in]WIDTH1Store width to use if WIDTH1_CONDITION = true
[in]TENSOR_TYPEType of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
[in]TENSORTensor basename
[in]XStarting X position
[in]STRIDE_YStride Y (in bytes) used to load each row.
[in]WIDTH1_CONDITIONCondition to select the WIDTH1 store
[out]dstOutput tile
[out]indirect_yIndirect Y index tile

Definition at line 523 of file tile_helpers.h.

◆ T_LOAD_NDHWC_INDIRECT

#define T_LOAD_NDHWC_INDIRECT (   DATA_TYPE,
  TILE_AREA,
  TILE_CHANNELS,
  TENSOR_TYPE,
  TENSOR,
  B,
  Z,
  Y,
  X,
  C,
  TENSOR_WIDTH,
  TENSOR_HEIGHT,
  TENSOR_DEPTH,
  STRIDE_Y,
  xi,
  yi,
  zi,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \
{ \
int _src_y = (X) + xi[_i].v + ((Y) + yi[_i].v) * (TENSOR_WIDTH) + ((Z) + zi[_i].v) * (TENSOR_WIDTH * TENSOR_HEIGHT); \
_src_y += (B) * (int)(TENSOR_WIDTH) * (int)(TENSOR_HEIGHT) * (int)(TENSOR_DEPTH); \
int _src_valid_y = (((X) + xi[_i].v) >= 0 && ((X) + xi[_i].v) < (int)(TENSOR_WIDTH) && ((Y) + yi[_i].v) >= 0 && ((Y) + yi[_i].v) < (int)(TENSOR_HEIGHT) \
&& ((Z) + zi[_i].v) >= 0 && ((Z) + zi[_i].v) < (int)(TENSOR_DEPTH)); \
if(_src_valid_y != 0) \
{ \
dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, _src_y, STRIDE_Y); \
} \
}) \
})
#define V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)
Load a vector from global memory (tensor)
Definition: tile_helpers.h:453
#define X(model)
Definition: CPPTypes.h:60

Load a tile from global memory (tensor) when the tensor is stored using a NDHWC layout using indirect X, Y and Z coordinates.

Parameters
[in]DATA_TYPEData type
[in]TILE_AREANumber of elements to load from Y (height) dimension * Number of elements to load from X (width) dimension
[in]TILE_CHANNELSNumber of elements to load from C (channel) dimension
[in]TENSOR_TYPEType of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
[in]TENSORTensor basename
[in]BStarting batch index
[in]ZStarting Z index
[in]YStarting Y index
[in]XStarting X index
[in]CStarting C index
[in]TENSOR_WIDTHNumber of elements to load from X (width) dimension
[in]TENSOR_HEIGHTNumber of elements to load from Y (height) dimension
[in]TENSOR_DEPTHNumber of elements to load from Z (depth) dimension
[in]STRIDE_YStride Y (in bytes)
[out]xiA tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect X coordinate
[out]yiA tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect Y coordinate
[out]ziA tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect Z coordinate
[out]dstOutput tile

Definition at line 689 of file tile_helpers.h.

Referenced by direct_convolution3d_ndhwc().

◆ T_LOAD_NHWC

#define T_LOAD_NHWC (   DATA_TYPE,
  TILE_HEIGHT,
  TILE_WIDTH,
  TILE_CHANNELS,
  TENSOR_TYPE,
  TENSOR,
  B,
  Y,
  X,
  C,
  TENSOR_WIDTH,
  TENSOR_HEIGHT,
  STRIDE_Y,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _yk, 0, 1, TILE_HEIGHT, \
{ \
LOOP_UNROLLING(int, _xk, 0, 1, TILE_WIDTH, \
{ \
int _src_y = (X) + _xk + ((Y) + _yk) * (TENSOR_WIDTH); \
_src_y += (B) * (int)(TENSOR_WIDTH) * (int)(TENSOR_HEIGHT); \
int _src_valid_y = (((X) + _xk) >= 0 && ((X) + _xk) < (int)(TENSOR_WIDTH) && ((Y) + _yk) >= 0 && ((Y) + _yk) < (int)(TENSOR_HEIGHT)); \
if(_src_valid_y != 0) \
{ \
dst[_xk + _yk * (TILE_WIDTH)].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, _src_y, STRIDE_Y); \
} \
}) \
}) \
})
#define V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)
Load a vector from global memory (tensor)
Definition: tile_helpers.h:453
#define X(model)
Definition: CPPTypes.h:60

Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout.

Parameters
[in]DATA_TYPEData type
[in]TILE_HEIGHTNumber of elements to load from Y (height) dimension
[in]TILE_WIDTHNumber of elements to load from X (width) dimension
[in]TILE_CHANNELSNumber of elements to load from C (channel) dimension
[in]TENSOR_TYPEType of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
[in]TENSORTensor basename
[in]BStarting batch index
[in]YStarting Y index
[in]XStarting X index
[in]CStarting C index
[in]TENSOR_HEIGHTNumber of elements to load from Y (height) dimension
[in]TENSOR_WIDTHNumber of elements to load from X (width) dimension
[in]STRIDE_YStride Y (in bytes)
[out]dstOutput tile

Definition at line 559 of file tile_helpers.h.

◆ T_LOAD_NHWC_INDIRECT

#define T_LOAD_NHWC_INDIRECT (   DATA_TYPE,
  TILE_AREA,
  TILE_CHANNELS,
  TENSOR_TYPE,
  TENSOR,
  B,
  Y,
  X,
  C,
  TENSOR_WIDTH,
  TENSOR_HEIGHT,
  STRIDE_Y,
  xi,
  yi,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \
{ \
int _src_y = (X) + xi[_i].v + ((Y) + yi[_i].v) * (TENSOR_WIDTH); \
_src_y += (B) * (int)(TENSOR_WIDTH) * (int)(TENSOR_HEIGHT); \
int _src_valid_y = (((X) + xi[_i].v) >= 0 && ((X) + xi[_i].v) < (int)(TENSOR_WIDTH) && ((Y) + yi[_i].v) >= 0 && ((Y) + yi[_i].v) < (int)(TENSOR_HEIGHT)); \
if(_src_valid_y != 0) \
{ \
dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, _src_y, STRIDE_Y); \
} \
}) \
})
#define V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)
Load a vector from global memory (tensor)
Definition: tile_helpers.h:453
#define X(model)
Definition: CPPTypes.h:60

Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout using indirect X and Y coordinates.

Parameters
[in]DATA_TYPEData type
[in]TILE_AREANumber of elements to load from Y (height) dimension * Number of elements to load from X (width) dimension
[in]TILE_CHANNELSNumber of elements to load from C (channel) dimension
[in]TENSOR_TYPEType of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
[in]TENSORTensor basename
[in]BStarting batch index
[in]YStarting Y index
[in]XStarting X index
[in]CStarting C index
[in]TENSOR_WIDTHNumber of elements to load from X (width) dimension
[in]TENSOR_HEIGHTNumber of elements to load from Y (height) dimension
[in]STRIDE_YStride Y (in bytes)
[out]xiA tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect X coordinate
[out]yiA tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect Y coordinate
[out]dstOutput tile

Definition at line 642 of file tile_helpers.h.

Referenced by direct_convolution_nhwc().

◆ T_LOAD_NHWC_WITH_DILATION

#define T_LOAD_NHWC_WITH_DILATION (   DATA_TYPE,
  TILE_HEIGHT,
  TILE_WIDTH,
  TILE_CHANNELS,
  TENSOR_TYPE,
  TENSOR,
  B,
  Y,
  X,
  C,
  TENSOR_WIDTH,
  TENSOR_HEIGHT,
  DILATION_X,
  DILATION_Y,
  BOUNDARY_CHECK,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _yk, 0, 1, TILE_HEIGHT, \
{ \
LOOP_UNROLLING(int, _xk, 0, 1, TILE_WIDTH, \
{ \
int _src_y = (X) + _xk * (DILATION_X); \
int _src_z = ((Y) + _yk * (DILATION_Y)); \
int _src_w = (B); \
bool _src_valid_y = (((X) + _xk * (DILATION_X)) >= 0) && (((X) + _xk * (DILATION_X)) < (int)(TENSOR_WIDTH)) && (((Y) + _yk * (DILATION_Y)) >= 0) && (((Y) + _yk * (DILATION_Y)) < (int)(TENSOR_HEIGHT)); \
if(!(BOUNDARY_CHECK)) \
{ \
dst[_xk + _yk * (TILE_WIDTH)].v = VLOAD(TILE_CHANNELS) \
(0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (C) * sizeof(DATA_TYPE) + (_src_y) * (TENSOR##_stride_y) + (_src_z) * (TENSOR##_stride_z) + (_src_w) * (TENSOR##_stride_w))); \
} \
else \
{ \
if(_src_valid_y) \
{ \
dst[_xk + _yk * (TILE_WIDTH)].v = VLOAD(TILE_CHANNELS) \
(0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (C) * sizeof(DATA_TYPE) + (_src_y) * (TENSOR##_stride_y) + (_src_z) * (TENSOR##_stride_z) + (_src_w) * (TENSOR##_stride_w))); \
} \
} \
}) \
}) \
})
#define X(model)
Definition: CPPTypes.h:60
#define VLOAD(size)
Definition: helpers.h:204

Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout with dilation for the X and Y increments.

Parameters
[in]DATA_TYPEData type
[in]TILE_HEIGHTNumber of elements to load from Y (height) dimension
[in]TILE_WIDTHNumber of elements to load from X (width) dimension
[in]TILE_CHANNELSNumber of elements to load from C (channel) dimension
[in]TENSOR_TYPEType of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
[in]TENSORTensor basename
[in]BStarting batch index
[in]YStarting Y index
[in]XStarting X index
[in]CStarting C index
[in]TENSOR_HEIGHTNumber of elements to load from Y (height) dimension
[in]TENSOR_WIDTHNumber of elements to load from X (width) dimension
[in]DILATION_XDilation for the X increment
[in]DILATION_YDilation for the Y increment
[in]BOUNDARY_CHECKBoundary check flag. If true, it checks for any out-of-bound reads
[out]dstOutput tile

Definition at line 596 of file tile_helpers.h.

◆ T_MMUL

#define T_MMUL (   LHS_DATA_TYPE,
  RHS_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  K0,
  LHS_LAYOUT,
  RHS_LAYOUT,
  lhs,
  rhs,
  dst 
)    T_MMUL_##LHS_LAYOUT##_##RHS_LAYOUT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Matrix multiplication.

Note
Performs: LHS X RHS + DST = DST
Parameters
[in]LHS_DATA_TYPELHS tile data type
[in]RHS_DATA_TYPERHS tile data type
[in]DST_DATA_TYPERHS tile data type
[in]M0Number of LHS rows
[in]N0Number of RHS columns
[in]K0Number of LHS columns
[in]LHS_LAYOUTLHS layout (T= transposed, NT= not transposed)
[in]RHS_LAYOUTRHS layout (T= transposed, NT= not transposed)
[in]lhsLHS tile
[in]rhsRHS tile
[in,out]dstDST tile

Definition at line 1149 of file tile_helpers.h.

Referenced by direct_convolution3d_ndhwc(), and direct_convolution_nhwc().

◆ T_MMUL_NT_T

#define T_MMUL_NT_T (   LHS_DATA_TYPE,
  RHS_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  K0,
  lhs,
  rhs,
  dst 
)    T_MMUL_NT_T_##LHS_DATA_TYPE##_##RHS_DATA_TYPE##_##DST_DATA_TYPE(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1150 of file tile_helpers.h.

◆ T_MMUL_NT_T_char_char_int

#define T_MMUL_NT_T_char_char_int (   LHS_DATA_TYPE,
  RHS_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  K0,
  lhs,
  rhs,
  dst 
)    T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1154 of file tile_helpers.h.

◆ T_MMUL_NT_T_FLOAT

#define T_MMUL_NT_T_FLOAT (   LHS_DATA_TYPE,
  RHS_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  K0,
  lhs,
  rhs,
  dst 
)
Value:
{ \
LOOP_UNROLLING(int, _m, 0, 1, M0, \
{ \
LOOP_UNROLLING(int, _n, 0, 1, N0, \
{ \
LOOP_UNROLLING(int, _k, 0, 1, K0, \
{ \
dst[_m].s[_n] = fma((DST_DATA_TYPE)(lhs[_m].s[_k]), (DST_DATA_TYPE)(rhs[_n].s[_k]), dst[_m].s[_n]); \
}) \
}) \
}) \
}
T fma(T x, T y, T z)
Computes (x*y) + z as if to infinite precision and rounded only once to fit the result type...

Definition at line 1157 of file tile_helpers.h.

◆ T_MMUL_NT_T_float_float_float

#define T_MMUL_NT_T_float_float_float (   LHS_DATA_TYPE,
  RHS_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  K0,
  lhs,
  rhs,
  dst 
)    T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1151 of file tile_helpers.h.

◆ T_MMUL_NT_T_half_half_float

#define T_MMUL_NT_T_half_half_float (   LHS_DATA_TYPE,
  RHS_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  K0,
  lhs,
  rhs,
  dst 
)    T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1152 of file tile_helpers.h.

◆ T_MMUL_NT_T_half_half_half

#define T_MMUL_NT_T_half_half_half (   LHS_DATA_TYPE,
  RHS_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  K0,
  lhs,
  rhs,
  dst 
)    T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1153 of file tile_helpers.h.

◆ T_MMUL_NT_T_INTEGER8

#define T_MMUL_NT_T_INTEGER8 (   LHS_DATA_TYPE,
  RHS_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  K0,
  lhs,
  rhs,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m, 0, 1, M0, \
{ \
LOOP_UNROLLING(int, _n, 0, 1, N0, \
{ \
DOT_PRODUCT_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, K0, (lhs[_m].v), (rhs[_n].v), dst[_m].s[_n]); \
}) \
}) \
})

Definition at line 1171 of file tile_helpers.h.

◆ T_MMUL_NT_T_uchar_uchar_int

#define T_MMUL_NT_T_uchar_uchar_int (   LHS_DATA_TYPE,
  RHS_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  K0,
  lhs,
  rhs,
  dst 
)    T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1156 of file tile_helpers.h.

◆ T_MMUL_NT_T_uchar_uchar_uint

#define T_MMUL_NT_T_uchar_uchar_uint (   LHS_DATA_TYPE,
  RHS_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  K0,
  lhs,
  rhs,
  dst 
)    T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1155 of file tile_helpers.h.

◆ T_OFFSET_CORRECTION

#define T_OFFSET_CORRECTION (   ACC_DATA_TYPE,
  M0,
  N0,
  K0,
  SRC_OFFSET,
  WEI_OFFSET,
  lhs,
  rhs,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
ACC_DATA_TYPE _tm = 0; \
LOOP_UNROLLING(int, _k0, 0, 1, K0, \
{ \
_tm += ((ACC_DATA_TYPE)lhs[_m0].s[_k0] * (ACC_DATA_TYPE)WEI_OFFSET); \
}) \
LOOP_UNROLLING(int, _n0, 0, 1, N0, \
{ \
dst[_m0].s[_n0] += _tm; \
LOOP_UNROLLING(int, _k0, 0, 1, K0, \
{ \
dst[_m0].s[_n0] += ((ACC_DATA_TYPE)rhs[_n0].s[_k0] * (ACC_DATA_TYPE)SRC_OFFSET); \
}) \
}) \
}) \
})
#define LOOP_UNROLLING(type, idx, start, step, num, macro)
Definition: tile_helpers.h:304

Offset correction for the QASYMM8 computation.

Parameters
[in]ACC_DATA_TYPEAccumulator data type
[in]M0Number of src/dst rows
[in]N0Number of src/dst columns
[in]K0Number of src columns
[in]SRC_OFFSETSource quantization offset
[in]WEI_OFFSETWeights quantization shift
[in]lhsLHS tile
[in]rhsRHS tile
[out]dstDST tile

Definition at line 754 of file tile_helpers.h.

Referenced by direct_convolution3d_ndhwc(), and direct_convolution_nhwc().

◆ T_QUANTIZE8

#define T_QUANTIZE8 (   SRC_DATA_TYPE,
  DST_DATA_TYPE,
  QUANTIZATION_TYPE,
  M0,
  N0,
  DST_OFFSET,
  DST_SHIFT,
  DST_MULTIPLIER,
  src,
  dst_multipliers,
  dst_shifts,
  dst 
)    T_QUANTIZE8_STR(SRC_DATA_TYPE, DST_DATA_TYPE, QUANTIZATION_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)

8-bit quantization with fixed-point scale

Parameters
[in]SRC_DATA_TYPESRC data type
[in]DST_DATA_TYPEDST data type
[in]QUANTIZATION_TYPEQuantization type (PER_TENSOR or PER_CHANNEL)
[in]M0Number of src/dst rows
[in]N0Number of src/dst columns
[in]DST_OFFSETQuantization offset used for both the per-tensor and per-channel quantization
[in]DST_SHIFTQuantization shift for the per-tensor quantization
[in]DST_MULTIPLIERQuantization multiplier for the per-tensor quantization
[in]srcInput tile
[in]dst_multipliersOutput multipliers tile for the per-channel quantization
[in]dst_shiftsOutput shift tile for the per-channel quantization
[out]dstOutput tile

Definition at line 789 of file tile_helpers.h.

◆ T_QUANTIZE8_ASYMMETRIC

#define T_QUANTIZE8_ASYMMETRIC (   SRC_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  DST_OFFSET,
  DST_SHIFT,
  DST_MULTIPLIER,
  src,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
LOOP_UNROLLING(int, _n0, 0, 1, N0, \
{ \
SRC_DATA_TYPE _tmp = 0; \
SRC_DATA_TYPE _src = src[_m0].s[_n0]; \
_src *= select((SRC_DATA_TYPE)1, ((SRC_DATA_TYPE)1 << (SRC_DATA_TYPE)(-DST_SHIFT)), ((SRC_DATA_TYPE)DST_SHIFT < (SRC_DATA_TYPE)0)); \
SRC_DATA_TYPE overflow = _src == DST_MULTIPLIER && _src == INT_MIN; \
long a_64 = (long)(_src); \
long b_64 = (long)(DST_MULTIPLIER); \
long ab_64 = a_64 * b_64; \
long mask1 = 1 << 30; \
long mask2 = 1 - (1 << 30); \
long is_positive_or_zero = ab_64 >= 0; \
long nudge = select(mask2, mask1, is_positive_or_zero); \
SRC_DATA_TYPE ab_x2_high32 = CONVERT((ab_64 + nudge) / (long)(1ll << 31), SRC_DATA_TYPE); \
_tmp = select(ab_x2_high32, (SRC_DATA_TYPE)INT_MAX, overflow); \
if(DST_SHIFT >= 0) \
{ \
long mask = ((((int)1) << DST_SHIFT) - (int)1); \
long threshold = _tmp < (int)0 ? (mask >> 1) + (long)1 : (mask >> 1) + 0; \
_tmp = (_tmp & mask) > threshold ? (_tmp >> DST_SHIFT) + (int)1 : (_tmp >> DST_SHIFT); \
} \
_tmp += DST_OFFSET; \
dst[_m0].s[_n0] = CONVERT_SAT(_tmp, DST_DATA_TYPE); \
}) \
}) \
})
#define CONVERT(x, type)
Definition: helpers.h:731
SimpleTensor< float > src
Definition: DFT.cpp:155
#define CONVERT_SAT(x, type)
Definition: helpers.h:734
SimpleTensor< T > select(const SimpleTensor< uint8_t > &c, const SimpleTensor< T > &x, const SimpleTensor< T > &y)
Definition: Select.cpp:38

Quantized the 8-bit tile with fixed-point scale for asymmetric.

Parameters
[in]SRC_DATA_TYPESRC data type
[in]DST_DATA_TYPEDST data type
[in]M0Number of src/dst rows
[in]N0Number of src/dst columns
[in]DST_OFFSETQuantization offset used for both the per-tensor and per-channel quantization
[in]DST_SHIFTQuantization shift for the per-tensor quantization
[in]DST_MULTIPLIERQuantization multiplier for the per-tensor quantization
[in]srcInput tile
[out]dstOutput tile

Definition at line 896 of file tile_helpers.h.

Referenced by direct_convolution3d_ndhwc(), and direct_convolution_nhwc().

◆ T_QUANTIZE8_PER_CHANNEL

#define T_QUANTIZE8_PER_CHANNEL (   SRC_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  DST_OFFSET,
  DST_SHIFT,
  DST_MULTIPLIER,
  src,
  dst_multipliers,
  dst_shifts,
  dst 
)

8-bit per-channel quantization with fixed-point scale

Parameters
[in]SRC_DATA_TYPESRC data type
[in]DST_DATA_TYPEDST data type
[in]M0Number of src/dst rows
[in]N0Number of src/dst columns
[in]DST_OFFSETQuantization offset
[in]DST_SHIFT(unused)
[in]DST_MULTIPLIER(unused)
[in]srcInput tile
[in]dst_multipliersOutput multipliers tile for the per-channel quantization
[in]dst_shiftsOutput shift tile for the per-channel quantization
[out]dstOutput tile

Definition at line 851 of file tile_helpers.h.

◆ T_QUANTIZE8_PER_TENSOR

#define T_QUANTIZE8_PER_TENSOR (   SRC_DATA_TYPE,
  DST_DATA_TYPE,
  M0,
  N0,
  DST_OFFSET,
  DST_SHIFT,
  DST_MULTIPLIER,
  src,
  dst_multipliers,
  dst_shifts,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
LOOP_UNROLLING(int, _n0, 0, 1, N0, \
{ \
SRC_DATA_TYPE _tmp = 0; \
SRC_DATA_TYPE _src = src[_m0].s[_n0]; \
_src *= select((SRC_DATA_TYPE)1, ((SRC_DATA_TYPE)1 << (SRC_DATA_TYPE)(-DST_SHIFT)), ((SRC_DATA_TYPE)DST_SHIFT < (SRC_DATA_TYPE)0)); \
SRC_DATA_TYPE overflow = _src == DST_MULTIPLIER && _src == INT_MIN; \
long a_64 = (long)(_src); \
long b_64 = (long)(DST_MULTIPLIER); \
long ab_64 = a_64 * b_64; \
long mask1 = 1 << 30; \
long mask2 = 1 - (1 << 30); \
long is_positive_or_zero = ab_64 >= 0; \
long nudge = select(mask2, mask1, is_positive_or_zero); \
SRC_DATA_TYPE ab_x2_high32 = CONVERT((ab_64 + nudge) / (long)(1ll << 31), SRC_DATA_TYPE); \
_tmp = select(ab_x2_high32, (SRC_DATA_TYPE)INT_MAX, overflow); \
if(DST_SHIFT >= 0) \
{ \
long mask = ((((int)1) << DST_SHIFT) - (long)1); \
long threshold = _tmp < (int)0 ? (mask >> 1) + (long)1 : (mask >> 1) + 0; \
_tmp = (_tmp & mask) > threshold ? (_tmp >> DST_SHIFT) + (int)1 : (_tmp >> DST_SHIFT); \
} \
_tmp += DST_OFFSET; \
dst[_m0].s[_n0] = CONVERT_SAT(_tmp, DST_DATA_TYPE); \
}) \
}) \
})
#define CONVERT(x, type)
Definition: helpers.h:731
SimpleTensor< float > src
Definition: DFT.cpp:155
#define CONVERT_SAT(x, type)
Definition: helpers.h:734
SimpleTensor< T > select(const SimpleTensor< uint8_t > &c, const SimpleTensor< T > &x, const SimpleTensor< T > &y)
Definition: Select.cpp:38

8-bit per-tensor quantization with fixed-point scale

Parameters
[in]SRC_DATA_TYPESRC data type
[in]DST_DATA_TYPEDST data type
[in]M0Number of src/dst rows
[in]N0Number of src/dst columns
[in]DST_OFFSETQuantization offset
[in]DST_SHIFTQuantization shift for the per-tensor quantization
[in]DST_MULTIPLIERQuantization multiplier for the per-tensor quantization
[in]srcInput tile
[in]dst_multipliers(unused)
[in]dst_shifts(unused)
[out]dstOutput tile

Definition at line 806 of file tile_helpers.h.

◆ T_QUANTIZE8_STR

#define T_QUANTIZE8_STR (   SRC_DATA_TYPE,
  DST_DATA_TYPE,
  QUANTIZATION_TYPE,
  M0,
  N0,
  DST_OFFSET,
  DST_SHIFT,
  DST_MULTIPLIER,
  src,
  dst_multipliers,
  dst_shifts,
  dst 
)    T_QUANTIZE8_##QUANTIZATION_TYPE(SRC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)

Definition at line 790 of file tile_helpers.h.

◆ T_ROWSET_MASK

#define T_ROWSET_MASK (   DATA_TYPE,
  M0,
  N0,
  VALUE_TO_SET,
  a,
  mask 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
LOOP_UNROLLING(int, _n0, 0, 1, N0, \
{ \
a[_m0].s[_n0] = select((DATA_TYPE)(a[_m0].s[_n0]), (DATA_TYPE)(VALUE_TO_SET), (SELECT_DATA_TYPE(DATA_TYPE))(mask[_m0].v == (DATA_TYPE)0)); \
}) \
}) \
})
#define SELECT_DATA_TYPE(type)
Definition: helpers.h:752
SimpleTensor< T > select(const SimpleTensor< uint8_t > &c, const SimpleTensor< T > &x, const SimpleTensor< T > &y)
Definition: Select.cpp:38

Conditional rowset (memset by row)

Note
Set the row to VALUE_TO_SET if the corresponding mask == 0
Parameters
[in]DATA_TYPEData type
[in]M0Number of LHS rows
[in]N0Number of LHS columns
[in]VALUE_TO_SETValue to set the row
[in,out]aInput/output tile
[out]maskMask to check for setting the row to VALUE_TO_SET

Definition at line 938 of file tile_helpers.h.

◆ T_SCALE_CONSTANT

#define T_SCALE_CONSTANT (   DATA_TYPE,
  M0,
  N0,
  lhs,
  rhs_constant,
  dst 
)
Value:
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
dst[_m0].v = lhs[_m0].v * (DATA_TYPE)rhs_constant; \
}) \
})

Element-wise scale with a constant value.

Note
Performs: LHS * constant = DST
Parameters
[in]DATA_TYPELHS/RHS/DST data type
[in]M0Number of LHS rows
[in]N0Number of LHS columns
[in]lhsLHS tile
[in]rhs_constantConstant value
[out]dstDST tile

Definition at line 1061 of file tile_helpers.h.

◆ T_STORE_INDIRECT_WIDTH_SELECT

#define T_STORE_INDIRECT_WIDTH_SELECT (   DATA_TYPE,
  HEIGHT,
  WIDTH0,
  WIDTH1,
  TENSOR_TYPE,
  TENSOR,
  X,
  STRIDE_Y,
  WIDTH1_CONDITION,
  src,
  indirect_y 
)
Value:
({ \
if(WIDTH1_CONDITION) \
{ \
LOOP_UNROLLING(int, _i, 0, 1, HEIGHT, \
{ \
VSTORE_PARTIAL(WIDTH0, WIDTH1) \
(CONVERT(src[HEIGHT - 1 - _i].v, VEC_DATA_TYPE(DATA_TYPE, WIDTH0)), 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \
}) \
} \
else \
{ \
LOOP_UNROLLING(int, _i, 0, 1, HEIGHT, \
{ \
VSTORE(WIDTH0) \
(CONVERT(src[HEIGHT - 1 - _i].v, VEC_DATA_TYPE(DATA_TYPE, WIDTH0)), 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \
}) \
} \
})
#define CONVERT(x, type)
Definition: helpers.h:731
SimpleTensor< float > src
Definition: DFT.cpp:155
#define X(model)
Definition: CPPTypes.h:60
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

Store a tile to global memory (tensor) using an indirect Y index tile and conditionally use a different length for the store.

Note
If WIDTH1_CONDITION is true, the store will use the WIDTH1 length for the store
The vectors are stored in reverse order so the invalid rows are overwritten by the valid ones
Parameters
[in]DATA_TYPEData type
[in]HEIGHTNumber of src rows
[in]WIDTH0Store width to use if WIDTH1_CONDITION = false
[in]WIDTH1Store width to use if WIDTH1_CONDITION = true
[in]TENSOR_TYPEType of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported cl_image is not supported.
[in]TENSORTensor basename
[in]XStarting X position
[in]STRIDE_YStride Y (in bytes)
[in]WIDTH1_CONDITIONCondition to select the WIDTH1 store
[in]srcInput tile
[in]indirect_yIndirect Y index tile

Definition at line 722 of file tile_helpers.h.

Referenced by direct_convolution3d_ndhwc(), and direct_convolution_nhwc().

◆ TENSOR3D_T

#define TENSOR3D_T (   name,
  type 
)    TENSOR3D_T_STR(name, type)

Definition at line 155 of file tile_helpers.h.

◆ TENSOR3D_T_BUFFER

#define TENSOR3D_T_BUFFER (   name)
Value:
__global uchar *name##_ptr, \
uint name##_stride_y, \
uint name##_stride_z, \
uint name##_w, \
uint name##_h, \
uint name##_n, \
uint name##_offset_first_element_in_bytes
const char * name

Definition at line 145 of file tile_helpers.h.

◆ TENSOR3D_T_IMAGE

#define TENSOR3D_T_IMAGE (   name)
Value:
__read_only image2d_t name##_img, \
__global uchar *name##_ptr, \
uint name##_stride_y, \
uint name##_stride_z, \
uint name##_w, \
uint name##_h, \
uint name##_n, \
uint name##_offset_first_element_in_bytes
const char * name

Definition at line 135 of file tile_helpers.h.

◆ TENSOR3D_T_STR

#define TENSOR3D_T_STR (   name,
  type 
)    TENSOR3D_T_##type(name)

Definition at line 154 of file tile_helpers.h.

◆ TENSOR4D

#define TENSOR4D (   name,
  type 
)    TENSOR4D_STR(name, type)

Definition at line 107 of file tile_helpers.h.

◆ TENSOR4D_BUFFER

#define TENSOR4D_BUFFER (   name)
Value:
__global uchar *name##_ptr, \
uint name##_stride_x, \
uint name##_step_x, \
uint name##_stride_y, \
uint name##_step_y, \
uint name##_stride_z, \
uint name##_step_z, \
uint name##_stride_w, \
uint name##_step_w, \
uint name##_offset_first_element_in_bytes
const char * name

Definition at line 94 of file tile_helpers.h.

◆ TENSOR4D_IMAGE

#define TENSOR4D_IMAGE (   name)
Value:
__read_only image2d_t name##_img, \
__global uchar *name##_ptr, \
uint name##_stride_x, \
uint name##_step_x, \
uint name##_stride_y, \
uint name##_step_y, \
uint name##_stride_z, \
uint name##_step_z, \
uint name##_stride_w, \
uint name##_step_w, \
uint name##_offset_first_element_in_bytes
const char * name

Definition at line 81 of file tile_helpers.h.

◆ TENSOR4D_STR

#define TENSOR4D_STR (   name,
  type 
)    TENSOR4D_##type(name)

Definition at line 106 of file tile_helpers.h.

◆ TENSOR4D_T

#define TENSOR4D_T (   name,
  type 
)    TENSOR4D_T_STR(name, type)

Definition at line 133 of file tile_helpers.h.

◆ TENSOR4D_T_BUFFER

#define TENSOR4D_T_BUFFER (   name)
Value:
__global uchar *name##_ptr, \
uint name##_stride_y, \
uint name##_stride_z, \
uint name##_stride_w, \
uint name##_c, \
uint name##_w, \
uint name##_h, \
uint name##_n, \
uint name##_offset_first_element_in_bytes
const char * name

Definition at line 121 of file tile_helpers.h.

◆ TENSOR4D_T_IMAGE

#define TENSOR4D_T_IMAGE (   name)
Value:
__read_only image2d_t name##_img, \
__global uchar *name##_ptr, \
uint name##_stride_y, \
uint name##_stride_z, \
uint name##_stride_w, \
uint name##_c, \
uint name##_w, \
uint name##_h, \
uint name##_n, \
uint name##_offset_first_element_in_bytes
const char * name

Definition at line 109 of file tile_helpers.h.

◆ TENSOR4D_T_STR

#define TENSOR4D_T_STR (   name,
  type 
)    TENSOR4D_T_##type(name)

Definition at line 132 of file tile_helpers.h.

◆ TILE

#define TILE (   DATA_TYPE,
  H,
  W,
  BASENAME 
)    TILE_STR(DATA_TYPE, H, W, BASENAME)

Tile object A tile object is a 2D memory block and can be accessed using the following syntax:

  1. a[m0].v = access the the vector at row "m0" (OpenCL vector)
  2. a[m0].s[x] = access the scalar element at row "m0" and column "n0" (scalar access)

Parameters
[in]DATA_TYPEData type of the tile
[in]HNumber of tile rows
[in]WNumber of tile colums
[in]BASENAMETile's name

Definition at line 74 of file tile_helpers.h.

Referenced by direct_convolution3d_ndhwc(), direct_convolution_nhwc(), scale_bilinear_nchw(), and scale_nearest_neighbour_nchw().

◆ TILE_STR

#define TILE_STR (   DATA_TYPE,
  H,
  W,
  BASENAME 
)
Value:
union { \
DATA_TYPE s[TILE_VECTOR_SIZE##W]; \
TILE_VECTOR_TYPE##W(DATA_TYPE) v; \
} BASENAME[H]

Definition at line 75 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE1

#define TILE_VECTOR_SIZE1   1

Definition at line 30 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE10

#define TILE_VECTOR_SIZE10   16

Definition at line 39 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE11

#define TILE_VECTOR_SIZE11   16

Definition at line 40 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE12

#define TILE_VECTOR_SIZE12   16

Definition at line 41 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE13

#define TILE_VECTOR_SIZE13   16

Definition at line 42 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE14

#define TILE_VECTOR_SIZE14   16

Definition at line 43 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE15

#define TILE_VECTOR_SIZE15   16

Definition at line 44 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE16

#define TILE_VECTOR_SIZE16   16

Definition at line 45 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE2

#define TILE_VECTOR_SIZE2   2

Definition at line 31 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE3

#define TILE_VECTOR_SIZE3   3

Definition at line 32 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE4

#define TILE_VECTOR_SIZE4   4

Definition at line 33 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE5

#define TILE_VECTOR_SIZE5   8

Definition at line 34 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE6

#define TILE_VECTOR_SIZE6   8

Definition at line 35 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE7

#define TILE_VECTOR_SIZE7   8

Definition at line 36 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE8

#define TILE_VECTOR_SIZE8   8

Definition at line 37 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE9

#define TILE_VECTOR_SIZE9   16

Definition at line 38 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE1

#define TILE_VECTOR_TYPE1 (   DATA_TYPE)    DATA_TYPE##1

Definition at line 47 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE10

#define TILE_VECTOR_TYPE10 (   DATA_TYPE)    DATA_TYPE##16

Definition at line 56 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE11

#define TILE_VECTOR_TYPE11 (   DATA_TYPE)    DATA_TYPE##16

Definition at line 57 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE12

#define TILE_VECTOR_TYPE12 (   DATA_TYPE)    DATA_TYPE##16

Definition at line 58 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE13

#define TILE_VECTOR_TYPE13 (   DATA_TYPE)    DATA_TYPE##16

Definition at line 59 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE14

#define TILE_VECTOR_TYPE14 (   DATA_TYPE)    DATA_TYPE##16

Definition at line 60 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE15

#define TILE_VECTOR_TYPE15 (   DATA_TYPE)    DATA_TYPE##16

Definition at line 61 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE16

#define TILE_VECTOR_TYPE16 (   DATA_TYPE)    DATA_TYPE##16

Definition at line 62 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE2

#define TILE_VECTOR_TYPE2 (   DATA_TYPE)    DATA_TYPE##2

Definition at line 48 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE3

#define TILE_VECTOR_TYPE3 (   DATA_TYPE)    DATA_TYPE##3

Definition at line 49 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE4

#define TILE_VECTOR_TYPE4 (   DATA_TYPE)    DATA_TYPE##4

Definition at line 50 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE5

#define TILE_VECTOR_TYPE5 (   DATA_TYPE)    DATA_TYPE##8

Definition at line 51 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE6

#define TILE_VECTOR_TYPE6 (   DATA_TYPE)    DATA_TYPE##8

Definition at line 52 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE7

#define TILE_VECTOR_TYPE7 (   DATA_TYPE)    DATA_TYPE##8

Definition at line 53 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE8

#define TILE_VECTOR_TYPE8 (   DATA_TYPE)    DATA_TYPE##8

Definition at line 54 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE9

#define TILE_VECTOR_TYPE9 (   DATA_TYPE)    DATA_TYPE##16

Definition at line 55 of file tile_helpers.h.

◆ UNROLL_INCR

#define UNROLL_INCR (   idx,
  step,
  macro 
)    idx += (step); (macro)

Definition at line 158 of file tile_helpers.h.

◆ V_ADD

#define V_ADD (   A_VAL,
  B_VAL 
)    ((A_VAL) + (B_VAL))

Definition at line 984 of file tile_helpers.h.

◆ V_DIV

#define V_DIV (   A_VAL,
  B_VAL 
)    ((A_VAL) / (B_VAL))

Definition at line 985 of file tile_helpers.h.

◆ V_LOAD

#define V_LOAD (   DATA_TYPE,
  WIDTH,
  TENSOR_TYPE,
  TENSOR,
  X,
  Y,
  STRIDE_Y 
)    V_LOAD_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)

Load a vector from global memory (tensor)

Parameters
[in]DATA_TYPEData type
[in]WIDTHNumber of dst columns
[in]TENSOR_TYPEType of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
[in]TENSORTensor basename
[in]XStarting X position
[in]YStarting Y position
[in]STRIDE_YStride Y (in bytes)

Definition at line 453 of file tile_helpers.h.

◆ V_LOAD_BUFFER

#define V_LOAD_BUFFER (   DATA_TYPE,
  WIDTH,
  TENSOR,
  X,
  Y,
  STRIDE_Y 
)
Value:
VLOAD(WIDTH) \
(0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (Y) * (STRIDE_Y)))
#define X(model)
Definition: CPPTypes.h:60
#define VLOAD(size)
Definition: helpers.h:204

Definition at line 455 of file tile_helpers.h.

◆ V_LOAD_IMAGE

#define V_LOAD_IMAGE (   DATA_TYPE,
  WIDTH,
  TENSOR,
  X,
  Y,
  STRIDE_Y 
)    READ_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y))

Definition at line 458 of file tile_helpers.h.

◆ V_LOAD_STR

#define V_LOAD_STR (   DATA_TYPE,
  WIDTH,
  TENSOR_TYPE,
  TENSOR,
  X,
  Y,
  STRIDE_Y 
)    V_LOAD_##TENSOR_TYPE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y)

Definition at line 454 of file tile_helpers.h.