Macros
#define	TILE_VECTOR_SIZE1 1

#define	TILE_VECTOR_SIZE2 2

#define	TILE_VECTOR_SIZE3 3

#define	TILE_VECTOR_SIZE4 4

#define	TILE_VECTOR_SIZE5 8

#define	TILE_VECTOR_SIZE6 8

#define	TILE_VECTOR_SIZE7 8

#define	TILE_VECTOR_SIZE8 8

#define	TILE_VECTOR_SIZE9 16

#define	TILE_VECTOR_SIZE10 16

#define	TILE_VECTOR_SIZE11 16

#define	TILE_VECTOR_SIZE12 16

#define	TILE_VECTOR_SIZE13 16

#define	TILE_VECTOR_SIZE14 16

#define	TILE_VECTOR_SIZE15 16

#define	TILE_VECTOR_SIZE16 16

#define	TILE_VECTOR_TYPE1(DATA_TYPE) DATA_TYPE##1

#define	TILE_VECTOR_TYPE2(DATA_TYPE) DATA_TYPE##2

#define	TILE_VECTOR_TYPE3(DATA_TYPE) DATA_TYPE##3

#define	TILE_VECTOR_TYPE4(DATA_TYPE) DATA_TYPE##4

#define	TILE_VECTOR_TYPE5(DATA_TYPE) DATA_TYPE##8

#define	TILE_VECTOR_TYPE6(DATA_TYPE) DATA_TYPE##8

#define	TILE_VECTOR_TYPE7(DATA_TYPE) DATA_TYPE##8

#define	TILE_VECTOR_TYPE8(DATA_TYPE) DATA_TYPE##8

#define	TILE_VECTOR_TYPE9(DATA_TYPE) DATA_TYPE##16

#define	TILE_VECTOR_TYPE10(DATA_TYPE) DATA_TYPE##16

#define	TILE_VECTOR_TYPE11(DATA_TYPE) DATA_TYPE##16

#define	TILE_VECTOR_TYPE12(DATA_TYPE) DATA_TYPE##16

#define	TILE_VECTOR_TYPE13(DATA_TYPE) DATA_TYPE##16

#define	TILE_VECTOR_TYPE14(DATA_TYPE) DATA_TYPE##16

#define	TILE_VECTOR_TYPE15(DATA_TYPE) DATA_TYPE##16

#define	TILE_VECTOR_TYPE16(DATA_TYPE) DATA_TYPE##16

#define	TILE(DATA_TYPE, H, W, BASENAME) TILE_STR(DATA_TYPE, H, W, BASENAME)
	Tile object A tile object is a 2D memory block and can be accessed using the following syntax: More...

#define	TILE_STR(DATA_TYPE, H, W, BASENAME)

#define	TENSOR4D_IMAGE(name)

#define	TENSOR4D_BUFFER(name)

#define	TENSOR4D_STR(name, type) TENSOR4D_##type(name)

#define	TENSOR4D(name, type) TENSOR4D_STR(name, type)

#define	TENSOR4D_T_IMAGE(name)

#define	TENSOR4D_T_BUFFER(name)

#define	TENSOR4D_T_STR(name, type) TENSOR4D_T_##type(name)

#define	TENSOR4D_T(name, type) TENSOR4D_T_STR(name, type)
	Legacy tensor 4D arguments. More...

#define	TENSOR4D_RO_T_IMAGE(name)

#define	TENSOR4D_RO_T_BUFFER(name) TENSOR4D_T_BUFFER(name)

#define	TENSOR4D_RO_T_STR(name, type) TENSOR4D_RO_T_##type(name)

#define	TENSOR4D_RO_T(name, type) TENSOR4D_RO_T_STR(name, type)
	Read-Only (RO) tensor 4D. More...

#define	TENSOR4D_WO_T_IMAGE(name)

#define	TENSOR4D_WO_T_BUFFER(name) TENSOR4D_T_BUFFER(name)

#define	TENSOR4D_WO_T_STR(name, type) TENSOR4D_WO_T_##type(name)

#define	TENSOR4D_WO_T(name, type) TENSOR4D_WO_T_STR(name, type)
	Write-Only (WO) tensor 4D. More...

#define	TENSOR3D_T_IMAGE(name)

#define	TENSOR3D_T_BUFFER(name)

#define	TENSOR3D_T_STR(name, type) TENSOR3D_T_##type(name)

#define	TENSOR3D_T(name, type) TENSOR3D_T_STR(name, type)

#define	UNROLL_INCR(idx, step, macro) idx += (step); (macro)

#define	LOOP_UNROLLING_1(idx, step, macro) (macro)

#define	LOOP_UNROLLING_2(idx, step, macro) LOOP_UNROLLING_1(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_3(idx, step, macro) LOOP_UNROLLING_2(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_4(idx, step, macro) LOOP_UNROLLING_3(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_5(idx, step, macro) LOOP_UNROLLING_4(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_6(idx, step, macro) LOOP_UNROLLING_5(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_7(idx, step, macro) LOOP_UNROLLING_6(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_8(idx, step, macro) LOOP_UNROLLING_7(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_9(idx, step, macro) LOOP_UNROLLING_8(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_10(idx, step, macro) LOOP_UNROLLING_9(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_11(idx, step, macro) LOOP_UNROLLING_10(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_12(idx, step, macro) LOOP_UNROLLING_11(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_13(idx, step, macro) LOOP_UNROLLING_12(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_14(idx, step, macro) LOOP_UNROLLING_13(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_15(idx, step, macro) LOOP_UNROLLING_14(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_16(idx, step, macro) LOOP_UNROLLING_15(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_17(idx, step, macro) LOOP_UNROLLING_16(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_18(idx, step, macro) LOOP_UNROLLING_17(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_19(idx, step, macro) LOOP_UNROLLING_18(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_20(idx, step, macro) LOOP_UNROLLING_19(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_21(idx, step, macro) LOOP_UNROLLING_20(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_22(idx, step, macro) LOOP_UNROLLING_21(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_23(idx, step, macro) LOOP_UNROLLING_22(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_24(idx, step, macro) LOOP_UNROLLING_23(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_25(idx, step, macro) LOOP_UNROLLING_24(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_26(idx, step, macro) LOOP_UNROLLING_25(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_27(idx, step, macro) LOOP_UNROLLING_26(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_28(idx, step, macro) LOOP_UNROLLING_27(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_29(idx, step, macro) LOOP_UNROLLING_28(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_30(idx, step, macro) LOOP_UNROLLING_29(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_31(idx, step, macro) LOOP_UNROLLING_30(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_32(idx, step, macro) LOOP_UNROLLING_31(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_33(idx, step, macro) LOOP_UNROLLING_32(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_34(idx, step, macro) LOOP_UNROLLING_33(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_35(idx, step, macro) LOOP_UNROLLING_34(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_36(idx, step, macro) LOOP_UNROLLING_35(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_37(idx, step, macro) LOOP_UNROLLING_36(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_38(idx, step, macro) LOOP_UNROLLING_37(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_39(idx, step, macro) LOOP_UNROLLING_38(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_40(idx, step, macro) LOOP_UNROLLING_39(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_41(idx, step, macro) LOOP_UNROLLING_40(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_42(idx, step, macro) LOOP_UNROLLING_41(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_43(idx, step, macro) LOOP_UNROLLING_42(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_44(idx, step, macro) LOOP_UNROLLING_43(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_45(idx, step, macro) LOOP_UNROLLING_44(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_46(idx, step, macro) LOOP_UNROLLING_45(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_47(idx, step, macro) LOOP_UNROLLING_46(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_48(idx, step, macro) LOOP_UNROLLING_47(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_49(idx, step, macro) LOOP_UNROLLING_48(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_50(idx, step, macro) LOOP_UNROLLING_49(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_51(idx, step, macro) LOOP_UNROLLING_50(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_52(idx, step, macro) LOOP_UNROLLING_51(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_53(idx, step, macro) LOOP_UNROLLING_52(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_54(idx, step, macro) LOOP_UNROLLING_53(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_55(idx, step, macro) LOOP_UNROLLING_54(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_56(idx, step, macro) LOOP_UNROLLING_55(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_57(idx, step, macro) LOOP_UNROLLING_56(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_58(idx, step, macro) LOOP_UNROLLING_57(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_59(idx, step, macro) LOOP_UNROLLING_58(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_60(idx, step, macro) LOOP_UNROLLING_59(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_61(idx, step, macro) LOOP_UNROLLING_60(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_62(idx, step, macro) LOOP_UNROLLING_61(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_63(idx, step, macro) LOOP_UNROLLING_62(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_64(idx, step, macro) LOOP_UNROLLING_63(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_65(idx, step, macro) LOOP_UNROLLING_64(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_66(idx, step, macro) LOOP_UNROLLING_65(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_67(idx, step, macro) LOOP_UNROLLING_66(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_68(idx, step, macro) LOOP_UNROLLING_67(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_69(idx, step, macro) LOOP_UNROLLING_68(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_70(idx, step, macro) LOOP_UNROLLING_69(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_71(idx, step, macro) LOOP_UNROLLING_70(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_72(idx, step, macro) LOOP_UNROLLING_71(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_73(idx, step, macro) LOOP_UNROLLING_72(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_74(idx, step, macro) LOOP_UNROLLING_73(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_75(idx, step, macro) LOOP_UNROLLING_74(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_76(idx, step, macro) LOOP_UNROLLING_75(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_77(idx, step, macro) LOOP_UNROLLING_76(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_78(idx, step, macro) LOOP_UNROLLING_77(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_79(idx, step, macro) LOOP_UNROLLING_78(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_80(idx, step, macro) LOOP_UNROLLING_79(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_81(idx, step, macro) LOOP_UNROLLING_80(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_82(idx, step, macro) LOOP_UNROLLING_81(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_83(idx, step, macro) LOOP_UNROLLING_82(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_84(idx, step, macro) LOOP_UNROLLING_83(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_85(idx, step, macro) LOOP_UNROLLING_84(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_86(idx, step, macro) LOOP_UNROLLING_85(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_87(idx, step, macro) LOOP_UNROLLING_86(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_88(idx, step, macro) LOOP_UNROLLING_87(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_89(idx, step, macro) LOOP_UNROLLING_88(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_90(idx, step, macro) LOOP_UNROLLING_89(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_91(idx, step, macro) LOOP_UNROLLING_90(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_92(idx, step, macro) LOOP_UNROLLING_91(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_93(idx, step, macro) LOOP_UNROLLING_92(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_94(idx, step, macro) LOOP_UNROLLING_93(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_95(idx, step, macro) LOOP_UNROLLING_94(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_96(idx, step, macro) LOOP_UNROLLING_95(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_97(idx, step, macro) LOOP_UNROLLING_96(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_98(idx, step, macro) LOOP_UNROLLING_97(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_99(idx, step, macro) LOOP_UNROLLING_98(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_100(idx, step, macro) LOOP_UNROLLING_99(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_101(idx, step, macro) LOOP_UNROLLING_100(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_102(idx, step, macro) LOOP_UNROLLING_101(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_103(idx, step, macro) LOOP_UNROLLING_102(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_104(idx, step, macro) LOOP_UNROLLING_103(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_105(idx, step, macro) LOOP_UNROLLING_104(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_106(idx, step, macro) LOOP_UNROLLING_105(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_107(idx, step, macro) LOOP_UNROLLING_106(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_108(idx, step, macro) LOOP_UNROLLING_107(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_109(idx, step, macro) LOOP_UNROLLING_108(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_110(idx, step, macro) LOOP_UNROLLING_109(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_111(idx, step, macro) LOOP_UNROLLING_110(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_112(idx, step, macro) LOOP_UNROLLING_111(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_113(idx, step, macro) LOOP_UNROLLING_112(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_114(idx, step, macro) LOOP_UNROLLING_113(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_115(idx, step, macro) LOOP_UNROLLING_114(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_116(idx, step, macro) LOOP_UNROLLING_115(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_117(idx, step, macro) LOOP_UNROLLING_116(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_118(idx, step, macro) LOOP_UNROLLING_117(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_119(idx, step, macro) LOOP_UNROLLING_118(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_120(idx, step, macro) LOOP_UNROLLING_119(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_121(idx, step, macro) LOOP_UNROLLING_120(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_122(idx, step, macro) LOOP_UNROLLING_121(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_123(idx, step, macro) LOOP_UNROLLING_122(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_124(idx, step, macro) LOOP_UNROLLING_123(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_125(idx, step, macro) LOOP_UNROLLING_124(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_126(idx, step, macro) LOOP_UNROLLING_125(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_127(idx, step, macro) LOOP_UNROLLING_126(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_128(idx, step, macro) LOOP_UNROLLING_127(idx, step, macro); UNROLL_INCR(idx, step, macro)

#define	LOOP_UNROLLING_STR(type, idx, start, step, num, macro)

#define	LOOP_UNROLLING(type, idx, start, step, num, macro) LOOP_UNROLLING_STR(type, idx, start, step, num, macro)

#define	GET_SPATIAL_IDX(IDX, N0, PARTIAL_N0) (max((int)(get_global_id(IDX) * N0 - (N0 - PARTIAL_N0) % N0), 0))
	Get the get_global_id with partial N0. More...

#define	DOT_PRODUCT_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, b, c) DOT_PRODUCT_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, b, c)
	Dot product integet 8bit function. More...

#define	DOT_PRODUCT_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, b, c) DOT_PRODUCT##K0##_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT1_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT2_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT3_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, x, y, val)

#define	DOT_PRODUCT5_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT6_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT7_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT9_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT10_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT11_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT12_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT13_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT14_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT15_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	DOT_PRODUCT16_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

#define	REDUCE_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, c) REDUCE_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, c)
	Dot product integet 8bit function. More...

#define	REDUCE_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, c) DOT_PRODUCT_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, (TILE_VECTOR_TYPE##K0(B_DATA_TYPE))1, c)

#define	V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y) V_LOAD_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)
	Load a vector from global memory (tensor) More...

#define	V_LOAD_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y) V_LOAD_##TENSOR_TYPE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y)

#define	V_LOAD_BUFFER(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y)

#define	V_LOAD_IMAGE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y) READ_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y))

#define	V_STORE(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y, VALUES) V_STORE_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y, VALUES)
	Store a vector in global memory (tensor) More...

#define	V_STORE_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y, VALUES) V_STORE_##TENSOR_TYPE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y, VALUES)

#define	V_STORE_BUFFER(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y, VALUES)

#define	V_STORE_IMAGE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y, VALUES) WRITE_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y), VALUES)

#define	T_LOAD(DATA_TYPE, HEIGHT, WIDTH, TENSOR_TYPE, TENSOR, X, Y, YI_MULTIPLIER, STRIDE_Y, dst)
	Load a tile from global memory (tensor) More...

#define	COPY_VECTOR_TO_TILE_COLUMN(VECTOR, TILE, WIDTH, COLUMN) COPY_VECTOR_TO_TILE_COLUMN_STR(VECTOR, TILE, WIDTH, COLUMN)
	Store a VECTOR variable (e.g. More...

#define	COPY_VECTOR_TO_TILE_COLUMN_STR(VECTOR, TILE, WIDTH, COLUMN) COPY_##WIDTH##_VECTOR_TO_TILE_COLUMN(VECTOR, TILE, COLUMN)

#define	COPY_1_VECTOR_TO_TILE_COLUMN(VECTOR, TILE, COLUMN)

#define	COPY_2_VECTOR_TO_TILE_COLUMN(VECTOR, TILE, COLUMN)

#define	COPY_3_VECTOR_TO_TILE_COLUMN(VECTOR, TILE, COLUMN)

#define	COPY_4_VECTOR_TO_TILE_COLUMN(VECTOR, TILE, COLUMN)

#define	COPY_8_VECTOR_TO_TILE_COLUMN(VECTOR, TILE, COLUMN)

#define	COPY_16_VECTOR_TO_TILE_COLUMN(VECTOR, TILE, COLUMN)

#define	T_LOAD_TRANSPOSED(DATA_TYPE, SRC_HEIGHT, SRC_WIDTH, TENSOR_TYPE, TENSOR, X, Y, YI_MULTIPLIER, STRIDE_Y, dst)
	Load SRC_HEIGHT x SRC_WIDTH elements from global memory (tensor), and store them in a SRC_WIDTH x SRC_HEIGHT tile. More...

#define	T_LOAD_INDIRECT(DATA_TYPE, HEIGHT, WIDTH, TENSOR_TYPE, TENSOR, X, STRIDE_Y, indirect_y, dst)
	Load a tile from global memory (tensor) using an indirect Y index tile. More...

#define	T_LOAD_INDIRECT_WIDTH_SELECT(DATA_TYPE, HEIGHT, WIDTH0, WIDTH1, TENSOR_TYPE, TENSOR, X, STRIDE_Y, WIDTH1_CONDITION, dst, indirect_y)
	Load a tile from global memory (tensor) using an indirect Y index tile and conditionally use a different length for the load. More...

#define	T_LOAD_NHWC(DATA_TYPE, TILE_HEIGHT, TILE_WIDTH, TILE_CHANNELS, TENSOR_TYPE, TENSOR, B, Y, X, C, TENSOR_WIDTH, TENSOR_HEIGHT, STRIDE_Y, dst)
	Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout. More...

#define	T_LOAD_NHWC_WITH_DILATION(DATA_TYPE, TILE_HEIGHT, TILE_WIDTH, TILE_CHANNELS, TENSOR_TYPE, TENSOR, B, Y, X, C, TENSOR_WIDTH, TENSOR_HEIGHT, DILATION_X, DILATION_Y, BOUNDARY_CHECK, dst)
	Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout with dilation for the X and Y increments. More...

#define	T_LOAD_NHWC_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, B, Y, X, C, TENSOR_WIDTH, TENSOR_HEIGHT, STRIDE_Y, xi, yi, dst)
	Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout using indirect X and Y coordinates. More...

#define	T_LOAD2D_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) T_LOAD2D_INDIRECT_STR(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst)
	Load a tile from global memory (tensor) using an indirect buffer for the Y coordinates. More...

#define	T_LOAD2D_INDIRECT_STR(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) T_LOAD2D_INDIRECT_##TENSOR_TYPE(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst)

#define	T_LOAD2D_INDIRECT_BUFFER(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst)

#define	T_LOAD2D_INDIRECT_IMAGE(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst)

#define	T_LOAD_NDHWC_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, B, Z, Y, X, C, TENSOR_WIDTH, TENSOR_HEIGHT, TENSOR_DEPTH, STRIDE_Y, xi, yi, zi, dst)
	Load a tile from global memory (tensor) when the tensor is stored using a NDHWC layout using indirect X, Y and Z coordinates. More...

#define	T_STORE_INDIRECT_WIDTH_SELECT(DATA_TYPE, HEIGHT, WIDTH0, WIDTH1, TENSOR_TYPE, TENSOR, X, STRIDE_Y, WIDTH1_CONDITION, src, indirect_y)
	Store a tile to global memory (tensor) using an indirect Y index tile and conditionally use a different length for the store. More...

#define	T_OFFSET_CORRECTION(ACC_DATA_TYPE, M0, N0, K0, SRC_OFFSET, WEI_OFFSET, lhs, rhs, dst)
	Offset correction for the QASYMM8 computation. More...

#define	T_QUANTIZE8(SRC_DATA_TYPE, DST_DATA_TYPE, QUANTIZATION_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst) T_QUANTIZE8_STR(SRC_DATA_TYPE, DST_DATA_TYPE, QUANTIZATION_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)
	8-bit quantization with fixed-point scale More...

#define	T_QUANTIZE8_STR(SRC_DATA_TYPE, DST_DATA_TYPE, QUANTIZATION_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst) T_QUANTIZE8_##QUANTIZATION_TYPE(SRC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)

#define	T_QUANTIZE8_PER_TENSOR(SRC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)
	8-bit per-tensor quantization with fixed-point scale More...

#define	T_QUANTIZE8_PER_CHANNEL(SRC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)
	8-bit per-channel quantization with fixed-point scale More...

#define	T_QUANTIZE8_ASYMMETRIC(SRC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst)
	Quantized the 8-bit tile with fixed-point scale for asymmetric. More...

#define	T_ROWSET_MASK(DATA_TYPE, M0, N0, VALUE_TO_SET, a, mask)
	Conditional rowset (memset by row) More...

#define	T_ACTIVATION(DATA_TYPE, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, src, dst)
	Element-wise activation for floating point types. More...

#define	relu_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x) (max((DATA_TYPE)ZERO_POINT, x))

#define	brelu_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x) (min((DATA_TYPE)A_VAL, max((DATA_TYPE)ZERO_POINT, x)))

#define	lu_brelu_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x) (min(max(x, (DATA_TYPE)B_VAL), (DATA_TYPE)A_VAL))

#define	hard_swish_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x) (x * ((min(max((DATA_TYPE)(x + (DATA_TYPE)3.f), (DATA_TYPE)0.f), (DATA_TYPE)6.f)) * (DATA_TYPE)0.166666667f))

#define	identity_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x) (x)

#define	ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x) op##_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x)

#define	ACTIVATION_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x) ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x)

#define	V_ADD(A_VAL, B_VAL) ((A_VAL) + (B_VAL))

#define	V_SUB(A_VAL, B_VAL) ((A_VAL) - (B_VAL))

#define	V_DIV(A_VAL, B_VAL) ((A_VAL) / (B_VAL))

#define	V_MUL(A_VAL, B_VAL) ((A_VAL) * (B_VAL))

#define	T_ACTIVATION_QUANTIZED(DATA_TYPE, M0, N0, ACTIVATION_TYPE, ZERO_POINT, A_VAL, B_VAL, src, dst)
	Element-wise activation for quantized types. More...

#define	T_ADD(DATA_TYPE, M0, N0, lhs, rhs, dst)
	Element-wise addition between two tiles. More...

#define	T_ADD_CONSTANT(DATA_TYPE, M0, N0, lhs, rhs_constant, dst)
	Element-wise addition with a constant value. More...

#define	T_ELTWISE_BROADCAST_ADD_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE_BROADCAST_LHS_X_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE_BROADCAST_RHS_X_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE_BROADCAST_LHS_X_SUB(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE_BROADCAST_RHS_X_SUB(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE_BROADCAST_DIV_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE_BROADCAST_LHS_X_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE_BROADCAST_RHS_X_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_SCALE_CONSTANT(DATA_TYPE, M0, N0, lhs, rhs_constant, dst)
	Element-wise scale with a constant value. More...

#define	T_ELTWISE_BROADCAST_X(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
	Element-wise operation with RHS broadcasted (RHS has the X dimension only) More...

#define	T_ELTWISE_BROADCAST_LHS_X(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
	Element-wise operation with LHS broadcasted (LHS has the X dimension only) More...

#define	T_ELTWISE_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE_SUB(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE_DIV(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

#define	T_ELTWISE(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
	Element-wise operation between two tiles (LHS and RHS) More...

#define	T_FLOOR(DST_DATA_TYPE, M0, N0, src, dst)
	Floor operation on a tile. More...

#define	T_MMUL(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, LHS_LAYOUT, RHS_LAYOUT, lhs, rhs, dst) T_MMUL_##LHS_LAYOUT##_##RHS_LAYOUT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)
	Matrix multiplication. More...

#define	T_MMUL_NT_T(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_T_##LHS_DATA_TYPE##_##RHS_DATA_TYPE##_##DST_DATA_TYPE(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_T_float_float_float(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_T_half_half_float(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_T_half_half_half(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_T_char_char_int(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_T_uchar_uchar_uint(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_T_uchar_uchar_int(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_NT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_NT_##LHS_DATA_TYPE##_##RHS_DATA_TYPE##_##DST_DATA_TYPE(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_NT_float_float_float(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_NT_half_half_float(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_NT_half_half_half(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_NT_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_T_NT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_T_NT_##LHS_DATA_TYPE##_##RHS_DATA_TYPE##_##DST_DATA_TYPE(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_T_NT_float_float_float(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_T_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_T_NT_half_half_float(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_T_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_T_NT_half_half_half(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_T_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_T_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_T_T(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_T_T_##LHS_DATA_TYPE##_##RHS_DATA_TYPE##_##DST_DATA_TYPE(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_T_T_float_float_float(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_T_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_T_T_half_half_float(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_T_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_T_T_half_half_half(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst) T_MMUL_T_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_T_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

#define	T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Macro Definition Documentation

◆ ACT_OP_QUANTIZED

#define ACT_OP_QUANTIZED	(	op,
		DATA_TYPE,
		VEC_SIZE,
		ZERO_POINT,
		A_VAL,
		B_VAL,
		x
	)	op##_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x)

Definition at line 1160 of file tile_helpers.h.

◆ ACTIVATION_QUANTIZED

#define ACTIVATION_QUANTIZED	(	op,
		DATA_TYPE,
		VEC_SIZE,
		ZERO_POINT,
		A_VAL,
		B_VAL,
		x
	)	ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_POINT, A_VAL, B_VAL, x)

Definition at line 1161 of file tile_helpers.h.

◆ brelu_op_quantized

#define brelu_op_quantized	(	DATA_TYPE,
		VEC_SIZE,
		ZERO_POINT,
		A_VAL,
		B_VAL,
		x
	)	(min((DATA_TYPE)A_VAL, max((DATA_TYPE)ZERO_POINT, x)))

Definition at line 1152 of file tile_helpers.h.

◆ COPY_16_VECTOR_TO_TILE_COLUMN

#define COPY_16_VECTOR_TO_TILE_COLUMN	(	VECTOR,
		TILE,
		COLUMN
	)

Value:

    ({                                                      \
        TILE[0].s[COLUMN] = VECTOR.s0;                      \
        TILE[1].s[COLUMN] = VECTOR.s1;                      \
        TILE[2].s[COLUMN] = VECTOR.s2;                      \
        TILE[3].s[COLUMN] = VECTOR.s3;                      \
        TILE[4].s[COLUMN] = VECTOR.s4;                      \
        TILE[5].s[COLUMN] = VECTOR.s5;                      \
        TILE[6].s[COLUMN] = VECTOR.s6;                      \
        TILE[7].s[COLUMN] = VECTOR.s7;                      \
        TILE[8].s[COLUMN] = VECTOR.s8;                      \
        TILE[9].s[COLUMN] = VECTOR.s9;                      \
        TILE[10].s[COLUMN] = VECTOR.sA;                     \
        TILE[11].s[COLUMN] = VECTOR.sB;                     \
        TILE[12].s[COLUMN] = VECTOR.sC;                     \
        TILE[13].s[COLUMN] = VECTOR.sD;                     \
        TILE[14].s[COLUMN] = VECTOR.sE;                     \
        TILE[15].s[COLUMN] = VECTOR.sF;                     \
    })

Definition at line 589 of file tile_helpers.h.

◆ COPY_1_VECTOR_TO_TILE_COLUMN

#define COPY_1_VECTOR_TO_TILE_COLUMN	(	VECTOR,
		TILE,
		COLUMN
	)

Value:

    ({                                                      \
        TILE[0].s[COLUMN] = VECTOR;                         \
    })

Definition at line 551 of file tile_helpers.h.

◆ COPY_2_VECTOR_TO_TILE_COLUMN

#define COPY_2_VECTOR_TO_TILE_COLUMN	(	VECTOR,
		TILE,
		COLUMN
	)

Value:

    ({                                                      \
        TILE[0].s[COLUMN] = VECTOR.s0;                      \
        TILE[1].s[COLUMN] = VECTOR.s1;                      \
    })

Definition at line 556 of file tile_helpers.h.

◆ COPY_3_VECTOR_TO_TILE_COLUMN

#define COPY_3_VECTOR_TO_TILE_COLUMN	(	VECTOR,
		TILE,
		COLUMN
	)

Value:

    ({                                                      \
        TILE[0].s[COLUMN] = VECTOR.s0;                      \
        TILE[1].s[COLUMN] = VECTOR.s1;                      \
        TILE[2].s[COLUMN] = VECTOR.s2;                      \
    })

Definition at line 562 of file tile_helpers.h.

◆ COPY_4_VECTOR_TO_TILE_COLUMN

#define COPY_4_VECTOR_TO_TILE_COLUMN	(	VECTOR,
		TILE,
		COLUMN
	)

Value:

    ({                                                      \
        TILE[0].s[COLUMN] = VECTOR.s0;                      \
        TILE[1].s[COLUMN] = VECTOR.s1;                      \
        TILE[2].s[COLUMN] = VECTOR.s2;                      \
        TILE[3].s[COLUMN] = VECTOR.s3;                      \
    })

Definition at line 569 of file tile_helpers.h.

◆ COPY_8_VECTOR_TO_TILE_COLUMN

#define COPY_8_VECTOR_TO_TILE_COLUMN	(	VECTOR,
		TILE,
		COLUMN
	)

Value:

    ({                                                      \
        TILE[0].s[COLUMN] = VECTOR.s0;                      \
        TILE[1].s[COLUMN] = VECTOR.s1;                      \
        TILE[2].s[COLUMN] = VECTOR.s2;                      \
        TILE[3].s[COLUMN] = VECTOR.s3;                      \
        TILE[4].s[COLUMN] = VECTOR.s4;                      \
        TILE[5].s[COLUMN] = VECTOR.s5;                      \
        TILE[6].s[COLUMN] = VECTOR.s6;                      \
        TILE[7].s[COLUMN] = VECTOR.s7;                      \
    })

Definition at line 577 of file tile_helpers.h.

◆ COPY_VECTOR_TO_TILE_COLUMN

#define COPY_VECTOR_TO_TILE_COLUMN	(	VECTOR,
		TILE,
		WIDTH,
		COLUMN
	)	COPY_VECTOR_TO_TILE_COLUMN_STR(VECTOR, TILE, WIDTH, COLUMN)

Store a VECTOR variable (e.g.

int4, int8, char2 etc.) to a specified column in the TILE object

Parameters

[in]	VECTOR	Vector variable to store
[in,out]	TILE	Tile variable to store to
[in]	WIDTH	Width of the vector variable, also height of the tile (e.g. 2 if char2)
[in]	COLUMN	Column index of the tile

Definition at line 549 of file tile_helpers.h.

◆ COPY_VECTOR_TO_TILE_COLUMN_STR

#define COPY_VECTOR_TO_TILE_COLUMN_STR	(	VECTOR,
		TILE,
		WIDTH,
		COLUMN
	)	COPY_##WIDTH##_VECTOR_TO_TILE_COLUMN(VECTOR, TILE, COLUMN)

Definition at line 550 of file tile_helpers.h.

◆ DOT_PRODUCT10_INTEGER8

#define DOT_PRODUCT10_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c);     \
        DOT_PRODUCT2_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89), ((b).s89), c);     \
    })

Definition at line 428 of file tile_helpers.h.

◆ DOT_PRODUCT11_INTEGER8

#define DOT_PRODUCT11_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c);     \
        DOT_PRODUCT3_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89A), ((b).s89A), c);     \
    })

Definition at line 433 of file tile_helpers.h.

◆ DOT_PRODUCT12_INTEGER8

#define DOT_PRODUCT12_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c);     \
        DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89AB), ((b).s89AB), c);     \
    })

Definition at line 438 of file tile_helpers.h.

◆ DOT_PRODUCT13_INTEGER8

#define DOT_PRODUCT13_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c);     \
        DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89AB), ((b).s89AB), c);     \
        DOT_PRODUCT1_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).sC), ((b).sC), c);     \
    })

Definition at line 443 of file tile_helpers.h.

◆ DOT_PRODUCT14_INTEGER8

#define DOT_PRODUCT14_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c);     \
        DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89AB), ((b).s89AB), c);     \
        DOT_PRODUCT2_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).sCD), ((b).sCD), c);     \
    })

Definition at line 449 of file tile_helpers.h.

◆ DOT_PRODUCT15_INTEGER8

#define DOT_PRODUCT15_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c);     \
        DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s89AB), ((b).s89AB), c);     \
        DOT_PRODUCT3_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).sCDE), ((b).sCDE), c);     \
    })

Definition at line 455 of file tile_helpers.h.

◆ DOT_PRODUCT16_INTEGER8

#define DOT_PRODUCT16_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                 \
        DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).lo), ((b).lo), c);      \
        DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).hi), ((b).hi), c);      \
    })

Definition at line 461 of file tile_helpers.h.

◆ DOT_PRODUCT1_INTEGER8

#define DOT_PRODUCT1_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        c += (C_DATA_TYPE)(a) * (C_DATA_TYPE)(b);     \
    })

Definition at line 368 of file tile_helpers.h.

◆ DOT_PRODUCT2_INTEGER8

#define DOT_PRODUCT2_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                  \
        c += (C_DATA_TYPE)(a).s0 * (C_DATA_TYPE)(b).s0; \
        c += (C_DATA_TYPE)(a).s1 * (C_DATA_TYPE)(b).s1; \
    })

Definition at line 385 of file tile_helpers.h.

◆ DOT_PRODUCT3_INTEGER8

#define DOT_PRODUCT3_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                  \
        DOT_PRODUCT2_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c);  \
        c += (C_DATA_TYPE)(a).s2 * (C_DATA_TYPE)(b).s2; \
    })

Definition at line 390 of file tile_helpers.h.

◆ DOT_PRODUCT4_INTEGER8

#define DOT_PRODUCT4_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		x,
		y,
		val
	)

Value:

    ({                                                    \
        val += (C_DATA_TYPE)(x).s0 * (C_DATA_TYPE)(y).s0; \
        val += (C_DATA_TYPE)(x).s1 * (C_DATA_TYPE)(y).s1; \
        val += (C_DATA_TYPE)(x).s2 * (C_DATA_TYPE)(y).s2; \
        val += (C_DATA_TYPE)(x).s3 * (C_DATA_TYPE)(y).s3; \
    })

Definition at line 395 of file tile_helpers.h.

◆ DOT_PRODUCT5_INTEGER8

#define DOT_PRODUCT5_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s0123), ((b).s0123), c);     \
        DOT_PRODUCT1_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s4), ((b).s4), c);     \
    })

Definition at line 403 of file tile_helpers.h.

◆ DOT_PRODUCT6_INTEGER8

#define DOT_PRODUCT6_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s0123), ((b).s0123), c);     \
        DOT_PRODUCT2_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s45), ((b).s45), c);     \
    })

Definition at line 408 of file tile_helpers.h.

◆ DOT_PRODUCT7_INTEGER8

#define DOT_PRODUCT7_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s0123), ((b).s0123), c);     \
        DOT_PRODUCT3_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s456), ((b).s456), c);     \
    })

Definition at line 413 of file tile_helpers.h.

◆ DOT_PRODUCT8_INTEGER8

#define DOT_PRODUCT8_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).lo), ((b).lo), c);     \
        DOT_PRODUCT4_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).hi), ((b).hi), c);     \
    })

Definition at line 418 of file tile_helpers.h.

◆ DOT_PRODUCT9_INTEGER8

#define DOT_PRODUCT9_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		a,
		b,
		c
	)

Value:

    ({                                                \
        DOT_PRODUCT8_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s01234567), ((b).s01234567), c);     \
        DOT_PRODUCT1_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, ((a).s8), ((b).s8), c);     \
    })

Definition at line 423 of file tile_helpers.h.

◆ DOT_PRODUCT_INTEGER8

#define DOT_PRODUCT_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		K0,
		a,
		b,
		c
	)	DOT_PRODUCT_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, b, c)

Dot product integet 8bit function.

Note: Performs: c += dot(a, b)

Parameters

[in]	A_DATA_TYPE	A (lhs) data type
[in]	B_DATA_TYPE	B (rhs) data type
[in]	C_DATA_TYPE	C (accumulator) data type
[in]	K0	Number of accumulations
[in]	a	OpenCL vector a
[in]	b	OpenCL vector b
[in]	c	Scalar variable c

Definition at line 366 of file tile_helpers.h.

◆ DOT_PRODUCT_INTEGER8_STR

#define DOT_PRODUCT_INTEGER8_STR	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		K0,
		a,
		b,
		c
	)	DOT_PRODUCT##K0##_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, a, b, c)

Definition at line 367 of file tile_helpers.h.

◆ GET_SPATIAL_IDX

#define GET_SPATIAL_IDX	(	IDX,
		N0,
		PARTIAL_N0
	)	(max((int)(get_global_id(IDX) * N0 - (N0 - PARTIAL_N0) % N0), 0))

Get the get_global_id with partial N0.

This function is useful when the dimension is not multiple of N0 and we need to use a partial N0 to avoid out-of-bound read/write

Note: PARTIAL_N0 is used for get_global_id(n) = 0.

Parameters

[in]	IDX	get_global_id index (0,1 and 2 only)
[in]	N0	Number of elements read/written on the IDX direction
[in]	PARTIAL_N0	Number of elements read/written on the IDX direction for get_global_id(IDX) = 0. If zero, the Number of elements read/written on the IDX direction for get_global_id(IDX) = 0 is N0

Definition at line 352 of file tile_helpers.h.

◆ hard_swish_op_quantized

#define hard_swish_op_quantized	(	DATA_TYPE,
		VEC_SIZE,
		ZERO_POINT,
		A_VAL,
		B_VAL,
		x
	)	(x * ((min(max((DATA_TYPE)(x + (DATA_TYPE)3.f), (DATA_TYPE)0.f), (DATA_TYPE)6.f)) * (DATA_TYPE)0.166666667f))

Definition at line 1156 of file tile_helpers.h.

◆ identity_op_quantized

#define identity_op_quantized	(	DATA_TYPE,
		VEC_SIZE,
		ZERO_POINT,
		A_VAL,
		B_VAL,
		x
	)	(x)

Definition at line 1158 of file tile_helpers.h.

◆ LOOP_UNROLLING

#define LOOP_UNROLLING	(	type,
		idx,
		start,
		step,
		num,
		macro
	)	LOOP_UNROLLING_STR(type, idx, start, step, num, macro)

Definition at line 340 of file tile_helpers.h.

◆ LOOP_UNROLLING_1

#define LOOP_UNROLLING_1	(	idx,
		step,
		macro
	)	(macro)

Definition at line 196 of file tile_helpers.h.

◆ LOOP_UNROLLING_10

#define LOOP_UNROLLING_10	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_9(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 205 of file tile_helpers.h.

◆ LOOP_UNROLLING_100

#define LOOP_UNROLLING_100	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_99(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 295 of file tile_helpers.h.

◆ LOOP_UNROLLING_101

#define LOOP_UNROLLING_101	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_100(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 296 of file tile_helpers.h.

◆ LOOP_UNROLLING_102

#define LOOP_UNROLLING_102	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_101(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 297 of file tile_helpers.h.

◆ LOOP_UNROLLING_103

#define LOOP_UNROLLING_103	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_102(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 298 of file tile_helpers.h.

◆ LOOP_UNROLLING_104

#define LOOP_UNROLLING_104	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_103(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 299 of file tile_helpers.h.

◆ LOOP_UNROLLING_105

#define LOOP_UNROLLING_105	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_104(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 300 of file tile_helpers.h.

◆ LOOP_UNROLLING_106

#define LOOP_UNROLLING_106	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_105(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 301 of file tile_helpers.h.

◆ LOOP_UNROLLING_107

#define LOOP_UNROLLING_107	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_106(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 302 of file tile_helpers.h.

◆ LOOP_UNROLLING_108

#define LOOP_UNROLLING_108	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_107(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 303 of file tile_helpers.h.

◆ LOOP_UNROLLING_109

#define LOOP_UNROLLING_109	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_108(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 304 of file tile_helpers.h.

◆ LOOP_UNROLLING_11

#define LOOP_UNROLLING_11	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_10(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 206 of file tile_helpers.h.

◆ LOOP_UNROLLING_110

#define LOOP_UNROLLING_110	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_109(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 305 of file tile_helpers.h.

◆ LOOP_UNROLLING_111

#define LOOP_UNROLLING_111	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_110(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 306 of file tile_helpers.h.

◆ LOOP_UNROLLING_112

#define LOOP_UNROLLING_112	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_111(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 307 of file tile_helpers.h.

◆ LOOP_UNROLLING_113

#define LOOP_UNROLLING_113	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_112(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 308 of file tile_helpers.h.

◆ LOOP_UNROLLING_114

#define LOOP_UNROLLING_114	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_113(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 309 of file tile_helpers.h.

◆ LOOP_UNROLLING_115

#define LOOP_UNROLLING_115	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_114(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 310 of file tile_helpers.h.

◆ LOOP_UNROLLING_116

#define LOOP_UNROLLING_116	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_115(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 311 of file tile_helpers.h.

◆ LOOP_UNROLLING_117

#define LOOP_UNROLLING_117	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_116(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 312 of file tile_helpers.h.

◆ LOOP_UNROLLING_118

#define LOOP_UNROLLING_118	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_117(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 313 of file tile_helpers.h.

◆ LOOP_UNROLLING_119

#define LOOP_UNROLLING_119	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_118(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 314 of file tile_helpers.h.

◆ LOOP_UNROLLING_12

#define LOOP_UNROLLING_12	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_11(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 207 of file tile_helpers.h.

◆ LOOP_UNROLLING_120

#define LOOP_UNROLLING_120	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_119(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 315 of file tile_helpers.h.

◆ LOOP_UNROLLING_121

#define LOOP_UNROLLING_121	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_120(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 316 of file tile_helpers.h.

◆ LOOP_UNROLLING_122

#define LOOP_UNROLLING_122	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_121(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 317 of file tile_helpers.h.

◆ LOOP_UNROLLING_123

#define LOOP_UNROLLING_123	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_122(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 318 of file tile_helpers.h.

◆ LOOP_UNROLLING_124

#define LOOP_UNROLLING_124	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_123(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 319 of file tile_helpers.h.

◆ LOOP_UNROLLING_125

#define LOOP_UNROLLING_125	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_124(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 320 of file tile_helpers.h.

◆ LOOP_UNROLLING_126

#define LOOP_UNROLLING_126	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_125(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 321 of file tile_helpers.h.

◆ LOOP_UNROLLING_127

#define LOOP_UNROLLING_127	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_126(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 322 of file tile_helpers.h.

◆ LOOP_UNROLLING_128

#define LOOP_UNROLLING_128	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_127(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 323 of file tile_helpers.h.

◆ LOOP_UNROLLING_13

#define LOOP_UNROLLING_13	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_12(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 208 of file tile_helpers.h.

◆ LOOP_UNROLLING_14

#define LOOP_UNROLLING_14	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_13(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 209 of file tile_helpers.h.

◆ LOOP_UNROLLING_15

#define LOOP_UNROLLING_15	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_14(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 210 of file tile_helpers.h.

◆ LOOP_UNROLLING_16

#define LOOP_UNROLLING_16	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_15(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 211 of file tile_helpers.h.

◆ LOOP_UNROLLING_17

#define LOOP_UNROLLING_17	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_16(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 212 of file tile_helpers.h.

◆ LOOP_UNROLLING_18

#define LOOP_UNROLLING_18	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_17(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 213 of file tile_helpers.h.

◆ LOOP_UNROLLING_19

#define LOOP_UNROLLING_19	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_18(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 214 of file tile_helpers.h.

◆ LOOP_UNROLLING_2

#define LOOP_UNROLLING_2	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_1(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 197 of file tile_helpers.h.

◆ LOOP_UNROLLING_20

#define LOOP_UNROLLING_20	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_19(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 215 of file tile_helpers.h.

◆ LOOP_UNROLLING_21

#define LOOP_UNROLLING_21	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_20(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 216 of file tile_helpers.h.

◆ LOOP_UNROLLING_22

#define LOOP_UNROLLING_22	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_21(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 217 of file tile_helpers.h.

◆ LOOP_UNROLLING_23

#define LOOP_UNROLLING_23	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_22(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 218 of file tile_helpers.h.

◆ LOOP_UNROLLING_24

#define LOOP_UNROLLING_24	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_23(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 219 of file tile_helpers.h.

◆ LOOP_UNROLLING_25

#define LOOP_UNROLLING_25	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_24(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 220 of file tile_helpers.h.

◆ LOOP_UNROLLING_26

#define LOOP_UNROLLING_26	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_25(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 221 of file tile_helpers.h.

◆ LOOP_UNROLLING_27

#define LOOP_UNROLLING_27	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_26(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 222 of file tile_helpers.h.

◆ LOOP_UNROLLING_28

#define LOOP_UNROLLING_28	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_27(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 223 of file tile_helpers.h.

◆ LOOP_UNROLLING_29

#define LOOP_UNROLLING_29	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_28(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 224 of file tile_helpers.h.

◆ LOOP_UNROLLING_3

#define LOOP_UNROLLING_3	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_2(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 198 of file tile_helpers.h.

◆ LOOP_UNROLLING_30

#define LOOP_UNROLLING_30	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_29(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 225 of file tile_helpers.h.

◆ LOOP_UNROLLING_31

#define LOOP_UNROLLING_31	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_30(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 226 of file tile_helpers.h.

◆ LOOP_UNROLLING_32

#define LOOP_UNROLLING_32	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_31(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 227 of file tile_helpers.h.

◆ LOOP_UNROLLING_33

#define LOOP_UNROLLING_33	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_32(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 228 of file tile_helpers.h.

◆ LOOP_UNROLLING_34

#define LOOP_UNROLLING_34	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_33(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 229 of file tile_helpers.h.

◆ LOOP_UNROLLING_35

#define LOOP_UNROLLING_35	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_34(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 230 of file tile_helpers.h.

◆ LOOP_UNROLLING_36

#define LOOP_UNROLLING_36	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_35(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 231 of file tile_helpers.h.

◆ LOOP_UNROLLING_37

#define LOOP_UNROLLING_37	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_36(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 232 of file tile_helpers.h.

◆ LOOP_UNROLLING_38

#define LOOP_UNROLLING_38	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_37(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 233 of file tile_helpers.h.

◆ LOOP_UNROLLING_39

#define LOOP_UNROLLING_39	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_38(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 234 of file tile_helpers.h.

◆ LOOP_UNROLLING_4

#define LOOP_UNROLLING_4	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_3(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 199 of file tile_helpers.h.

◆ LOOP_UNROLLING_40

#define LOOP_UNROLLING_40	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_39(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 235 of file tile_helpers.h.

◆ LOOP_UNROLLING_41

#define LOOP_UNROLLING_41	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_40(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 236 of file tile_helpers.h.

◆ LOOP_UNROLLING_42

#define LOOP_UNROLLING_42	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_41(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 237 of file tile_helpers.h.

◆ LOOP_UNROLLING_43

#define LOOP_UNROLLING_43	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_42(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 238 of file tile_helpers.h.

◆ LOOP_UNROLLING_44

#define LOOP_UNROLLING_44	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_43(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 239 of file tile_helpers.h.

◆ LOOP_UNROLLING_45

#define LOOP_UNROLLING_45	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_44(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 240 of file tile_helpers.h.

◆ LOOP_UNROLLING_46

#define LOOP_UNROLLING_46	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_45(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 241 of file tile_helpers.h.

◆ LOOP_UNROLLING_47

#define LOOP_UNROLLING_47	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_46(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 242 of file tile_helpers.h.

◆ LOOP_UNROLLING_48

#define LOOP_UNROLLING_48	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_47(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 243 of file tile_helpers.h.

◆ LOOP_UNROLLING_49

#define LOOP_UNROLLING_49	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_48(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 244 of file tile_helpers.h.

◆ LOOP_UNROLLING_5

#define LOOP_UNROLLING_5	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_4(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 200 of file tile_helpers.h.

◆ LOOP_UNROLLING_50

#define LOOP_UNROLLING_50	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_49(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 245 of file tile_helpers.h.

◆ LOOP_UNROLLING_51

#define LOOP_UNROLLING_51	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_50(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 246 of file tile_helpers.h.

◆ LOOP_UNROLLING_52

#define LOOP_UNROLLING_52	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_51(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 247 of file tile_helpers.h.

◆ LOOP_UNROLLING_53

#define LOOP_UNROLLING_53	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_52(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 248 of file tile_helpers.h.

◆ LOOP_UNROLLING_54

#define LOOP_UNROLLING_54	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_53(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 249 of file tile_helpers.h.

◆ LOOP_UNROLLING_55

#define LOOP_UNROLLING_55	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_54(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 250 of file tile_helpers.h.

◆ LOOP_UNROLLING_56

#define LOOP_UNROLLING_56	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_55(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 251 of file tile_helpers.h.

◆ LOOP_UNROLLING_57

#define LOOP_UNROLLING_57	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_56(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 252 of file tile_helpers.h.

◆ LOOP_UNROLLING_58

#define LOOP_UNROLLING_58	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_57(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 253 of file tile_helpers.h.

◆ LOOP_UNROLLING_59

#define LOOP_UNROLLING_59	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_58(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 254 of file tile_helpers.h.

◆ LOOP_UNROLLING_6

#define LOOP_UNROLLING_6	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_5(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 201 of file tile_helpers.h.

◆ LOOP_UNROLLING_60

#define LOOP_UNROLLING_60	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_59(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 255 of file tile_helpers.h.

◆ LOOP_UNROLLING_61

#define LOOP_UNROLLING_61	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_60(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 256 of file tile_helpers.h.

◆ LOOP_UNROLLING_62

#define LOOP_UNROLLING_62	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_61(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 257 of file tile_helpers.h.

◆ LOOP_UNROLLING_63

#define LOOP_UNROLLING_63	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_62(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 258 of file tile_helpers.h.

◆ LOOP_UNROLLING_64

#define LOOP_UNROLLING_64	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_63(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 259 of file tile_helpers.h.

◆ LOOP_UNROLLING_65

#define LOOP_UNROLLING_65	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_64(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 260 of file tile_helpers.h.

◆ LOOP_UNROLLING_66

#define LOOP_UNROLLING_66	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_65(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 261 of file tile_helpers.h.

◆ LOOP_UNROLLING_67

#define LOOP_UNROLLING_67	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_66(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 262 of file tile_helpers.h.

◆ LOOP_UNROLLING_68

#define LOOP_UNROLLING_68	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_67(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 263 of file tile_helpers.h.

◆ LOOP_UNROLLING_69

#define LOOP_UNROLLING_69	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_68(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 264 of file tile_helpers.h.

◆ LOOP_UNROLLING_7

#define LOOP_UNROLLING_7	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_6(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 202 of file tile_helpers.h.

◆ LOOP_UNROLLING_70

#define LOOP_UNROLLING_70	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_69(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 265 of file tile_helpers.h.

◆ LOOP_UNROLLING_71

#define LOOP_UNROLLING_71	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_70(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 266 of file tile_helpers.h.

◆ LOOP_UNROLLING_72

#define LOOP_UNROLLING_72	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_71(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 267 of file tile_helpers.h.

◆ LOOP_UNROLLING_73

#define LOOP_UNROLLING_73	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_72(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 268 of file tile_helpers.h.

◆ LOOP_UNROLLING_74

#define LOOP_UNROLLING_74	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_73(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 269 of file tile_helpers.h.

◆ LOOP_UNROLLING_75

#define LOOP_UNROLLING_75	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_74(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 270 of file tile_helpers.h.

◆ LOOP_UNROLLING_76

#define LOOP_UNROLLING_76	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_75(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 271 of file tile_helpers.h.

◆ LOOP_UNROLLING_77

#define LOOP_UNROLLING_77	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_76(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 272 of file tile_helpers.h.

◆ LOOP_UNROLLING_78

#define LOOP_UNROLLING_78	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_77(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 273 of file tile_helpers.h.

◆ LOOP_UNROLLING_79

#define LOOP_UNROLLING_79	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_78(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 274 of file tile_helpers.h.

◆ LOOP_UNROLLING_8

#define LOOP_UNROLLING_8	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_7(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 203 of file tile_helpers.h.

◆ LOOP_UNROLLING_80

#define LOOP_UNROLLING_80	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_79(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 275 of file tile_helpers.h.

◆ LOOP_UNROLLING_81

#define LOOP_UNROLLING_81	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_80(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 276 of file tile_helpers.h.

◆ LOOP_UNROLLING_82

#define LOOP_UNROLLING_82	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_81(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 277 of file tile_helpers.h.

◆ LOOP_UNROLLING_83

#define LOOP_UNROLLING_83	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_82(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 278 of file tile_helpers.h.

◆ LOOP_UNROLLING_84

#define LOOP_UNROLLING_84	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_83(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 279 of file tile_helpers.h.

◆ LOOP_UNROLLING_85

#define LOOP_UNROLLING_85	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_84(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 280 of file tile_helpers.h.

◆ LOOP_UNROLLING_86

#define LOOP_UNROLLING_86	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_85(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 281 of file tile_helpers.h.

◆ LOOP_UNROLLING_87

#define LOOP_UNROLLING_87	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_86(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 282 of file tile_helpers.h.

◆ LOOP_UNROLLING_88

#define LOOP_UNROLLING_88	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_87(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 283 of file tile_helpers.h.

◆ LOOP_UNROLLING_89

#define LOOP_UNROLLING_89	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_88(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 284 of file tile_helpers.h.

◆ LOOP_UNROLLING_9

#define LOOP_UNROLLING_9	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_8(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 204 of file tile_helpers.h.

◆ LOOP_UNROLLING_90

#define LOOP_UNROLLING_90	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_89(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 285 of file tile_helpers.h.

◆ LOOP_UNROLLING_91

#define LOOP_UNROLLING_91	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_90(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 286 of file tile_helpers.h.

◆ LOOP_UNROLLING_92

#define LOOP_UNROLLING_92	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_91(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 287 of file tile_helpers.h.

◆ LOOP_UNROLLING_93

#define LOOP_UNROLLING_93	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_92(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 288 of file tile_helpers.h.

◆ LOOP_UNROLLING_94

#define LOOP_UNROLLING_94	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_93(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 289 of file tile_helpers.h.

◆ LOOP_UNROLLING_95

#define LOOP_UNROLLING_95	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_94(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 290 of file tile_helpers.h.

◆ LOOP_UNROLLING_96

#define LOOP_UNROLLING_96	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_95(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 291 of file tile_helpers.h.

◆ LOOP_UNROLLING_97

#define LOOP_UNROLLING_97	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_96(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 292 of file tile_helpers.h.

◆ LOOP_UNROLLING_98

#define LOOP_UNROLLING_98	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_97(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 293 of file tile_helpers.h.

◆ LOOP_UNROLLING_99

#define LOOP_UNROLLING_99	(	idx,
		step,
		macro
	)	LOOP_UNROLLING_98(idx, step, macro); UNROLL_INCR(idx, step, macro)

Definition at line 294 of file tile_helpers.h.

◆ LOOP_UNROLLING_STR

#define LOOP_UNROLLING_STR	(	type,
		idx,
		start,
		step,
		num,
		macro
	)

Value:

    {                                                          \
        type idx = start;                                      \
        LOOP_UNROLLING_##num(idx, step, macro);                \
    }

Definition at line 325 of file tile_helpers.h.

◆ lu_brelu_op_quantized

#define lu_brelu_op_quantized	(	DATA_TYPE,
		VEC_SIZE,
		ZERO_POINT,
		A_VAL,
		B_VAL,
		x
	)	(min(max(x, (DATA_TYPE)B_VAL), (DATA_TYPE)A_VAL))

Definition at line 1154 of file tile_helpers.h.

◆ REDUCE_INTEGER8

#define REDUCE_INTEGER8	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		K0,
		a,
		c
	)	REDUCE_INTEGER8_STR(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, c)

Dot product integet 8bit function.

Note: Performs: c += dot(a, b)

Parameters

[in]	A_DATA_TYPE	A (lhs) data type
[in]	B_DATA_TYPE	B (rhs) data type
[in]	C_DATA_TYPE	C (accumulator) data type
[in]	K0	Number of accumulations
[in]	a	OpenCL vector a
[in]	c	Scalar variable c

Definition at line 478 of file tile_helpers.h.

◆ REDUCE_INTEGER8_STR

#define REDUCE_INTEGER8_STR	(	A_DATA_TYPE,
		B_DATA_TYPE,
		C_DATA_TYPE,
		K0,
		a,
		c
	)	DOT_PRODUCT_INTEGER8(A_DATA_TYPE, B_DATA_TYPE, C_DATA_TYPE, K0, a, (TILE_VECTOR_TYPE##K0(B_DATA_TYPE))1, c)

Definition at line 479 of file tile_helpers.h.

◆ relu_op_quantized

#define relu_op_quantized	(	DATA_TYPE,
		VEC_SIZE,
		ZERO_POINT,
		A_VAL,
		B_VAL,
		x
	)	(max((DATA_TYPE)ZERO_POINT, x))

Definition at line 1150 of file tile_helpers.h.

◆ T_ACTIVATION

#define T_ACTIVATION	(	DATA_TYPE,
		M0,
		N0,
		ACTIVATION_TYPE,
		A_VAL,
		B_VAL,
		src,
		dst
	)

Value:

    ({                                                                                         \
        LOOP_UNROLLING(int, _m0, 0, 1, M0,                                                     \
        {                                                                                      \
            dst[_m0].v = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, N0, src[_m0].v, A_VAL, B_VAL); \
        })                                                                                     \
    })

Element-wise activation for floating point types.

Note: Performs: activation(LHS) = DST

Parameters

[in]	DATA_TYPE	SRC/DST data type
[in]	M0	Number of SRC/DST rows
[in]	N0	Number of SRC/DST columns
[in]	ACTIVATION_TYPE	Activation type
[in]	A_VAL	A value used for the activation (e.g. tanh_op, brelu,..)
[in]	B_VAL	B value used for the activation (e.g. tanh_op, brelu,..)
[out]	src	SRC tile
[out]	dst	DST tile

Definition at line 1139 of file tile_helpers.h.

◆ T_ACTIVATION_QUANTIZED

#define T_ACTIVATION_QUANTIZED	(	DATA_TYPE,
		M0,
		N0,
		ACTIVATION_TYPE,
		ZERO_POINT,
		A_VAL,
		B_VAL,
		src,
		dst
	)

Value:

    ({ \
        LOOP_UNROLLING(int, _m0, 0, 1, M0, \
        { \
            dst[_m0].v = ACTIVATION_QUANTIZED(ACTIVATION_TYPE, DATA_TYPE, N0, ZERO_POINT, A_VAL, B_VAL, src[_m0].v); \
        })                                                                                          \
    })

Element-wise activation for quantized types.

Note: Performs: activation(LHS) = DST

Parameters

[in]	DATA_TYPE	SRC/DST data type
[in]	M0	Number of SRC/DST rows
[in]	N0	Number of SRC/DST columns
[in]	ACTIVATION_TYPE	Activation type
[in]	ZERO_POINT	The zero value to consider in the computation
[in]	A_VAL	Quantized A value used for the activation (e.g. tanh_op, brelu,..)
[in]	B_VAL	Quantized B value used for the activation (e.g. tanh_op, brelu,..)
[out]	src	SRC tile
[out]	dst	DST tile

Definition at line 1182 of file tile_helpers.h.

◆ T_ADD

#define T_ADD	(	DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)

Value:

    ({                                                            \
        LOOP_UNROLLING(int, _m0, 0, 1, M0,                        \
        {                                                         \
            dst[_m0].v = lhs[_m0].v + rhs[_m0].v; \
        })                                                        \
    })

Element-wise addition between two tiles.

Note: Performs: LHS + RHS = DST

Parameters

[in]	DATA_TYPE	LHS/RHS/DST data type
[in]	M0	Number of LHS rows
[in]	N0	Number of LHS columns
[in]	lhs	LHS tile
[in]	rhs	Constant RHS tile
[out]	dst	DST tile

Definition at line 1201 of file tile_helpers.h.

◆ T_ADD_CONSTANT

#define T_ADD_CONSTANT	(	DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs_constant,
		dst
	)

Value:

    ({                                                            \
        LOOP_UNROLLING(int, _m0, 0, 1, M0,                        \
        {                                                         \
            dst[_m0].v = lhs[_m0].v + (DATA_TYPE)rhs_constant;               \
        })                                                        \
    })

Element-wise addition with a constant value.

Note: Performs: LHS + constant = DST

Parameters

[in]	DATA_TYPE	LHS/RHS/DST data type
[in]	M0	Number of LHS rows
[in]	N0	Number of LHS columns
[in]	lhs	LHS tile
[in]	rhs_constant	Constant value
[out]	dst	DST tile

Definition at line 1220 of file tile_helpers.h.

◆ T_ELTWISE

#define T_ELTWISE	(	T_ELWISE_OP,
		DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)

Value:

    ({                                                      \
        LOOP_UNROLLING(int, _m0, 0, 1, M0,                  \
        {                                                   \
            dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)));             \
        })                                                  \
    })

Element-wise operation between two tiles (LHS and RHS)

Note: Performs: LHS OP RHS = DST; Both tiles must have same data type

Parameters

[in]	T_ELWISE_OP	Elementwise operator to perform
[in]	DST_DATA_TYPE	DST data type
[in]	M0	Number of LHS rows
[in]	N0	Number of LHS columns
[in]	lhs	LHS tile
[in]	rhs	RHS tile
[out]	dst	DST tile

Definition at line 1319 of file tile_helpers.h.

◆ T_ELTWISE_ADD

#define T_ELTWISE_ADD	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1301 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_ADD_X

#define T_ELTWISE_BROADCAST_ADD_X	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE_BROADCAST_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1228 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_DIV_X

#define T_ELTWISE_BROADCAST_DIV_X	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE_BROADCAST_X(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1235 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_LHS_X

#define T_ELTWISE_BROADCAST_LHS_X	(	T_ELWISE_OP,
		DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)

Value:

    ({                                                      \
        LOOP_UNROLLING(int, _m0, 0, 1, M0,                  \
        {                                                   \
            dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)));             \
        })                                                  \
    })

Element-wise operation with LHS broadcasted (LHS has the X dimension only)

Note: Performs: LHS[broadcasted] OP RHS = DST; Both tiles must have same data type

Parameters

[in]	T_ELWISE_OP	Elementwise operator to perform
[in]	DST_DATA_TYPE	DST data type
[in]	M0	Number of RHS rows
[in]	N0	Number of RHS columns
[in]	lhs	LHS tile
[in]	rhs	RHS tile
[out]	dst	DST tile

Definition at line 1293 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_LHS_X_ADD

#define T_ELTWISE_BROADCAST_LHS_X_ADD	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE_BROADCAST_LHS_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1229 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_LHS_X_MUL

#define T_ELTWISE_BROADCAST_LHS_X_MUL	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE_BROADCAST_LHS_X(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1237 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_LHS_X_SUB

#define T_ELTWISE_BROADCAST_LHS_X_SUB	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE_BROADCAST_LHS_X(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1232 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_RHS_X_ADD

#define T_ELTWISE_BROADCAST_RHS_X_ADD	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE_BROADCAST_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1230 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_RHS_X_MUL

#define T_ELTWISE_BROADCAST_RHS_X_MUL	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE_BROADCAST_X(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1238 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_RHS_X_SUB

#define T_ELTWISE_BROADCAST_RHS_X_SUB	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE_BROADCAST_X(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1233 of file tile_helpers.h.

◆ T_ELTWISE_BROADCAST_X

#define T_ELTWISE_BROADCAST_X	(	T_ELWISE_OP,
		DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)

Value:

    ({                                                      \
        LOOP_UNROLLING(int, _m0, 0, 1, M0,                  \
        {                                                   \
            dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)));             \
        })                                                  \
    })

Element-wise operation with RHS broadcasted (RHS has the X dimension only)

Note: Performs: LHS OP RHS[broadcasted] = DST; Both tiles must have same data type

Parameters

[in]	T_ELWISE_OP	Elementwise operator to perform
[in]	DST_DATA_TYPE	DST data type
[in]	M0	Number of LHS rows
[in]	N0	Number of LHS columns
[in]	lhs	LHS tile
[in]	rhs	RHS tile
[out]	dst	DST tile

Definition at line 1272 of file tile_helpers.h.

◆ T_ELTWISE_DIV

#define T_ELTWISE_DIV	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1303 of file tile_helpers.h.

◆ T_ELTWISE_MUL

#define T_ELTWISE_MUL	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1304 of file tile_helpers.h.

◆ T_ELTWISE_SUB

#define T_ELTWISE_SUB	(	DST_DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs,
		dst
	)	T_ELTWISE(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)

Definition at line 1302 of file tile_helpers.h.

◆ T_FLOOR

#define T_FLOOR	(	DST_DATA_TYPE,
		M0,
		N0,
		src,
		dst
	)

Value:

    ({                                                      \
        LOOP_UNROLLING(int, _m0, 0, 1, M0,                  \
        {                                                   \
            dst[_m0].v = floor(CONVERT(src[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)));             \
        })                                                  \
    })

Floor operation on a tile.

Note: Performs: floor(SRC) = DST; Both tiles must have same data type

Parameters

[in]	DST_DATA_TYPE	DST data type
[in]	M0	Number of SRC rows
[in]	N0	Number of SRC columns
[in]	src	LHS tile
[out]	dst	DST tile

Definition at line 1338 of file tile_helpers.h.

◆ T_LOAD

#define T_LOAD	(	DATA_TYPE,
		HEIGHT,
		WIDTH,
		TENSOR_TYPE,
		TENSOR,
		X,
		Y,
		YI_MULTIPLIER,
		STRIDE_Y,
		dst
	)

Value:

    ({                                                                                                                 \
        LOOP_UNROLLING(int, _i, 0, 1, HEIGHT,                                                                          \
        {                                                                                                              \
            dst[_i].v = V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, ((Y) + _i * (int)(YI_MULTIPLIER)), STRIDE_Y); \
        })                                                                                                             \
    })

Load a tile from global memory (tensor)

Parameters

[in]	DATA_TYPE	Data type
[in]	HEIGHT	Number of dst rows
[in]	WIDTH	Number of dst columns
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	X	Starting X position
[in]	Y	Starting Y position
[in]	YI_MULTIPLIER	Parameter used to multiply the internal row increment (_i). In common cases should be 1 but it becomes useful when we want to load rows which are multiple of STRIDE_Y. (e.g. loading the weights of convolution layer). In this case the address calculation is performed as: (Y + _i * Y_MULTIPLIER) * STRIDE_Y
[in]	STRIDE_Y	Stride Y (in bytes) used to load each row.
[out]	dst	Output tile

Definition at line 534 of file tile_helpers.h.

◆ T_LOAD2D_INDIRECT

#define T_LOAD2D_INDIRECT	(	DATA_TYPE,
		TILE_AREA,
		TILE_CHANNELS,
		TENSOR_TYPE,
		TENSOR,
		C,
		STRIDE_Y,
		yi,
		dst
	)	T_LOAD2D_INDIRECT_STR(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst)

Load a tile from global memory (tensor) using an indirect buffer for the Y coordinates.

Parameters

[in]	DATA_TYPE	Data type
[in]	TILE_AREA	Number of elements to load from Y (height) dimension * Number of elements to load from X (width) dimension
[in]	TILE_CHANNELS	Number of elements to load from C (channel) dimension
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). When TENSOR_TYPE=IMAGE, the if condition for the out-of-bound check can be skipped In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	C	Starting C index
[in]	STRIDE_Y	Stride Y (in bytes)
[out]	yi	A tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect Y coordinate 16 is the maximum indirect buffer size.
[out]	dst	Output tile

Definition at line 823 of file tile_helpers.h.

◆ T_LOAD2D_INDIRECT_BUFFER

#define T_LOAD2D_INDIRECT_BUFFER	(	DATA_TYPE,
		TILE_AREA,
		TILE_CHANNELS,
		TENSOR_TYPE,
		TENSOR,
		C,
		STRIDE_Y,
		yi,
		dst
	)

Value:

    ({ \
        LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \
        { \
            if(yi[0].s[_i] >= 0) \
            { \
                dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[0].s[_i], STRIDE_Y); \
            } \
        }) \
    })

Definition at line 825 of file tile_helpers.h.

◆ T_LOAD2D_INDIRECT_IMAGE

#define T_LOAD2D_INDIRECT_IMAGE	(	DATA_TYPE,
		TILE_AREA,
		TILE_CHANNELS,
		TENSOR_TYPE,
		TENSOR,
		C,
		STRIDE_Y,
		yi,
		dst
	)

Value:

    ({ \
        LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \
        { \
            dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[0].s[_i], STRIDE_Y); \
        }) \
    })

Definition at line 836 of file tile_helpers.h.

◆ T_LOAD2D_INDIRECT_STR

#define T_LOAD2D_INDIRECT_STR	(	DATA_TYPE,
		TILE_AREA,
		TILE_CHANNELS,
		TENSOR_TYPE,
		TENSOR,
		C,
		STRIDE_Y,
		yi,
		dst
	)	T_LOAD2D_INDIRECT_##TENSOR_TYPE(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst)

Definition at line 824 of file tile_helpers.h.

◆ T_LOAD_INDIRECT

#define T_LOAD_INDIRECT	(	DATA_TYPE,
		HEIGHT,
		WIDTH,
		TENSOR_TYPE,
		TENSOR,
		X,
		STRIDE_Y,
		indirect_y,
		dst
	)

Value:

    ({                                                                                                  \
        LOOP_UNROLLING(int, _i, 0, 1, HEIGHT,                                                           \
        {                                                                                               \
            dst[_i].v = V_LOAD(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, (indirect_y[_i].v), STRIDE_Y); \
        })                                                                                              \
    })

Load a tile from global memory (tensor) using an indirect Y index tile.

Parameters

[in]	DATA_TYPE	Data type
[in]	HEIGHT	Number of dst rows
[in]	WIDTH	Number of dst columns
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	X	Starting X position
[in]	STRIDE_Y	Stride Y (in bytes)
[in]	indirect_y	Indirect Y index tile
[out]	dst	Output tile

Definition at line 649 of file tile_helpers.h.

◆ T_LOAD_INDIRECT_WIDTH_SELECT

#define T_LOAD_INDIRECT_WIDTH_SELECT	(	DATA_TYPE,
		HEIGHT,
		WIDTH0,
		WIDTH1,
		TENSOR_TYPE,
		TENSOR,
		X,
		STRIDE_Y,
		WIDTH1_CONDITION,
		dst,
		indirect_y
	)

Value:

    ({                                                                                                                                                                                             \
        if(WIDTH1_CONDITION)                                                                                                                                                                       \
        {                                                                                                                                                                                          \
            LOOP_UNROLLING(int, _i, 0, 1, HEIGHT,                                                                                                                                                  \
            {                                                                                                                                                                                      \
                VLOAD_PARTIAL(WIDTH0, WIDTH1)                                                         \
                (dst[HEIGHT - 1 - _i].v, 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y));               \
            })                                                                                                                                                                                     \
        }                                                                                                                                                                                          \
        else                                                                                                                                                                                       \
        {                                                                                                                                                                                          \
            LOOP_UNROLLING(int, _i, 0, 1, HEIGHT,                                                                                                                                                  \
            {                                                                                                                                                                                      \
                dst[HEIGHT - 1 - _i].v = V_LOAD(DATA_TYPE, WIDTH0, TENSOR_TYPE, TENSOR, X, (indirect_y[HEIGHT - 1 - _i].v), STRIDE_Y); \
            })                                                                                                                                                                                     \
        }                                                                                                                                                                                          \
    })

Load a tile from global memory (tensor) using an indirect Y index tile and conditionally use a different length for the load.

Note: If WIDTH1_CONDITION is true, the load will use the WIDTH1 length for the store; The vectors are stored in reverse order so the invalid rows are overwritten by the valid ones

Parameters

[in]	DATA_TYPE	Data type
[in]	HEIGHT	Number of dst rows
[in]	WIDTH0	Store width to use if WIDTH1_CONDITION = false
[in]	WIDTH1	Store width to use if WIDTH1_CONDITION = true
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	X	Starting X position
[in]	STRIDE_Y	Stride Y (in bytes) used to load each row.
[in]	WIDTH1_CONDITION	Condition to select the WIDTH1 store
[out]	dst	Output tile
[out]	indirect_y	Indirect Y index tile

Definition at line 675 of file tile_helpers.h.

◆ T_LOAD_NDHWC_INDIRECT

#define T_LOAD_NDHWC_INDIRECT	(	DATA_TYPE,
		TILE_AREA,
		TILE_CHANNELS,
		TENSOR_TYPE,
		TENSOR,
		B,
		Z,
		Y,
		X,
		C,
		TENSOR_WIDTH,
		TENSOR_HEIGHT,
		TENSOR_DEPTH,
		STRIDE_Y,
		xi,
		yi,
		zi,
		dst
	)

Value:

    ({                                                                                                                                                                \
        LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA,                                                                                                                      \
        {                                                                                                                                                             \
            int _src_y = (X) + xi[_i].v + ((Y) + yi[_i].v) * (TENSOR_WIDTH) + ((Z) + zi[_i].v) * (TENSOR_WIDTH * TENSOR_HEIGHT);                                      \
            _src_y += (B) * (int)(TENSOR_WIDTH) * (int)(TENSOR_HEIGHT) * (int)(TENSOR_DEPTH);                                                                         \
            int _src_valid_y = (((X) + xi[_i].v) >= 0 && ((X) + xi[_i].v) < (int)(TENSOR_WIDTH) && ((Y) + yi[_i].v) >= 0 && ((Y) + yi[_i].v) < (int)(TENSOR_HEIGHT)   \
                             && ((Z) + zi[_i].v) >= 0 && ((Z) + zi[_i].v) < (int)(TENSOR_DEPTH));                                                                     \
            if(_src_valid_y != 0)                                                                                                                                     \
            {                                                                                                                                                         \
                dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, _src_y, STRIDE_Y);                                                               \
            }                                                                                                                                                         \
        })                                                                                                                                                            \
    })

Load a tile from global memory (tensor) when the tensor is stored using a NDHWC layout using indirect X, Y and Z coordinates.

Parameters

[in]	DATA_TYPE	Data type
[in]	TILE_AREA	Number of elements to load from Y (height) dimension * Number of elements to load from X (width) dimension
[in]	TILE_CHANNELS	Number of elements to load from C (channel) dimension
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	B	Starting batch index
[in]	Z	Starting Z index
[in]	Y	Starting Y index
[in]	X	Starting X index
[in]	C	Starting C index
[in]	TENSOR_WIDTH	Number of elements to load from X (width) dimension
[in]	TENSOR_HEIGHT	Number of elements to load from Y (height) dimension
[in]	TENSOR_DEPTH	Number of elements to load from Z (depth) dimension
[in]	STRIDE_Y	Stride Y (in bytes)
[out]	xi	A tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect X coordinate
[out]	yi	A tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect Y coordinate
[out]	zi	A tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect Z coordinate
[out]	dst	Output tile

Definition at line 866 of file tile_helpers.h.

◆ T_LOAD_NHWC

#define T_LOAD_NHWC	(	DATA_TYPE,
		TILE_HEIGHT,
		TILE_WIDTH,
		TILE_CHANNELS,
		TENSOR_TYPE,
		TENSOR,
		B,
		Y,
		X,
		C,
		TENSOR_WIDTH,
		TENSOR_HEIGHT,
		STRIDE_Y,
		dst
	)

Value:

    ({                                                                                                                                                \
        LOOP_UNROLLING(int, _yk, 0, 1, TILE_HEIGHT,                                                                                                   \
        {                                                                                                                                             \
            LOOP_UNROLLING(int, _xk, 0, 1, TILE_WIDTH,                                                                                                \
            {                                                                                                                                         \
                int _src_y = (X) + _xk + ((Y) + _yk) * (TENSOR_WIDTH);                                                                                \
                _src_y    += (B) * (int)(TENSOR_WIDTH) * (int)(TENSOR_HEIGHT);                                                                        \
                int _src_valid_y = (((X) + _xk) >= 0 && ((X) + _xk) < (int)(TENSOR_WIDTH) && ((Y) + _yk) >= 0 && ((Y) + _yk) < (int)(TENSOR_HEIGHT)); \
                if(_src_valid_y != 0)                                                                                                                 \
                {                                                                                                                                     \
                    dst[_xk + _yk * (TILE_WIDTH)].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, _src_y, STRIDE_Y);                     \
                }                                                                                                                                     \
            })                                                                                                                                        \
        })                                                                                                                                            \
    })

Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout.

Parameters

[in]	DATA_TYPE	Data type
[in]	TILE_HEIGHT	Number of elements to load from Y (height) dimension
[in]	TILE_WIDTH	Number of elements to load from X (width) dimension
[in]	TILE_CHANNELS	Number of elements to load from C (channel) dimension
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	B	Starting batch index
[in]	Y	Starting Y index
[in]	X	Starting X index
[in]	C	Starting C index
[in]	TENSOR_HEIGHT	Number of elements to load from Y (height) dimension
[in]	TENSOR_WIDTH	Number of elements to load from X (width) dimension
[in]	STRIDE_Y	Stride Y (in bytes)
[out]	dst	Output tile

Definition at line 711 of file tile_helpers.h.

◆ T_LOAD_NHWC_INDIRECT

#define T_LOAD_NHWC_INDIRECT	(	DATA_TYPE,
		TILE_AREA,
		TILE_CHANNELS,
		TENSOR_TYPE,
		TENSOR,
		B,
		Y,
		X,
		C,
		TENSOR_WIDTH,
		TENSOR_HEIGHT,
		STRIDE_Y,
		xi,
		yi,
		dst
	)

Value:

    ({                                                                                                                                                                \
        LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA,                                                                                                                      \
        {                                                                                                                                                             \
            int _src_y = (X) + xi[_i].v + ((Y) + yi[_i].v) * (TENSOR_WIDTH);                                                                                          \
            _src_y += (B) * (int)(TENSOR_WIDTH) * (int)(TENSOR_HEIGHT);                                                                                               \
            int _src_valid_y = (((X) + xi[_i].v) >= 0 && ((X) + xi[_i].v) < (int)(TENSOR_WIDTH) && ((Y) + yi[_i].v) >= 0 && ((Y) + yi[_i].v) < (int)(TENSOR_HEIGHT)); \
            if(_src_valid_y != 0)                                                                                                                                     \
            {                                                                                                                                                         \
                dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, _src_y, STRIDE_Y);                                                               \
            }                                                                                                                                                         \
        })                                                                                                                                                            \
    })

Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout using indirect X and Y coordinates.

Parameters

[in]	DATA_TYPE	Data type
[in]	TILE_AREA	Number of elements to load from Y (height) dimension * Number of elements to load from X (width) dimension
[in]	TILE_CHANNELS	Number of elements to load from C (channel) dimension
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	B	Starting batch index
[in]	Y	Starting Y index
[in]	X	Starting X index
[in]	C	Starting C index
[in]	TENSOR_WIDTH	Number of elements to load from X (width) dimension
[in]	TENSOR_HEIGHT	Number of elements to load from Y (height) dimension
[in]	STRIDE_Y	Stride Y (in bytes)
[out]	xi	A tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect X coordinate
[out]	yi	A tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect Y coordinate
[out]	dst	Output tile

Definition at line 794 of file tile_helpers.h.

◆ T_LOAD_NHWC_WITH_DILATION

#define T_LOAD_NHWC_WITH_DILATION	(	DATA_TYPE,
		TILE_HEIGHT,
		TILE_WIDTH,
		TILE_CHANNELS,
		TENSOR_TYPE,
		TENSOR,
		B,
		Y,
		X,
		C,
		TENSOR_WIDTH,
		TENSOR_HEIGHT,
		DILATION_X,
		DILATION_Y,
		BOUNDARY_CHECK,
		dst
	)

Value:

    ({ \
        LOOP_UNROLLING(int, _yk, 0, 1, TILE_HEIGHT, \
        { \
            LOOP_UNROLLING(int, _xk, 0, 1, TILE_WIDTH, \
            { \
                int _src_y = (X) + _xk * (DILATION_X); \
                int _src_z = ((Y) + _yk * (DILATION_Y)); \
                int _src_w    = (B); \
                bool _src_valid_y = (((X) + _xk * (DILATION_X)) >= 0) && (((X) + _xk * (DILATION_X)) < (int)(TENSOR_WIDTH)) && (((Y) + _yk * (DILATION_Y)) >= 0) && (((Y) + _yk * (DILATION_Y)) < (int)(TENSOR_HEIGHT)); \
                if(!(BOUNDARY_CHECK)) \
                { \
                    dst[_xk + _yk * (TILE_WIDTH)].v = VLOAD(TILE_CHANNELS)                                                \
                    (0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (C) * sizeof(DATA_TYPE) + (_src_y) * (TENSOR##_stride_y) + (_src_z) * (TENSOR##_stride_z) + (_src_w) * (TENSOR##_stride_w))); \
                } \
                else \
                { \
                    if(_src_valid_y) \
                    { \
                        dst[_xk + _yk * (TILE_WIDTH)].v = VLOAD(TILE_CHANNELS)                                                \
                    (0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (C) * sizeof(DATA_TYPE) + (_src_y) * (TENSOR##_stride_y) + (_src_z) * (TENSOR##_stride_z) + (_src_w) * (TENSOR##_stride_w))); \
                    }                                                                                                                                                                                                 \
                } \
            })                                                                                                                                                                                                             \
        })                                                                                                                                                                                                             \
    })

Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout with dilation for the X and Y increments.

Parameters

[in]	DATA_TYPE	Data type
[in]	TILE_HEIGHT	Number of elements to load from Y (height) dimension
[in]	TILE_WIDTH	Number of elements to load from X (width) dimension
[in]	TILE_CHANNELS	Number of elements to load from C (channel) dimension
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	B	Starting batch index
[in]	Y	Starting Y index
[in]	X	Starting X index
[in]	C	Starting C index
[in]	TENSOR_HEIGHT	Number of elements to load from Y (height) dimension
[in]	TENSOR_WIDTH	Number of elements to load from X (width) dimension
[in]	DILATION_X	Dilation for the X increment
[in]	DILATION_Y	Dilation for the Y increment
[in]	BOUNDARY_CHECK	Boundary check flag. If true, it checks for any out-of-bound reads
[out]	dst	Output tile

Definition at line 748 of file tile_helpers.h.

◆ T_LOAD_TRANSPOSED

#define T_LOAD_TRANSPOSED	(	DATA_TYPE,
		SRC_HEIGHT,
		SRC_WIDTH,
		TENSOR_TYPE,
		TENSOR,
		X,
		Y,
		YI_MULTIPLIER,
		STRIDE_Y,
		dst
	)

Value:

    ({                                                                                                                   \
        LOOP_UNROLLING(int, _i, 0, 1, SRC_HEIGHT,                                                                        \
        {                                                                                                                \
            VEC_DATA_TYPE(DATA_TYPE, SRC_WIDTH)                                                                          \
                tmp = V_LOAD(DATA_TYPE, SRC_WIDTH, TENSOR_TYPE, TENSOR, X, ((Y) + _i * (int)(YI_MULTIPLIER)), STRIDE_Y); \
            COPY_VECTOR_TO_TILE_COLUMN(tmp, dst, SRC_WIDTH, _i);                                                         \
        })                                                                                                               \
    })

Load SRC_HEIGHT x SRC_WIDTH elements from global memory (tensor), and store them in a SRC_WIDTH x SRC_HEIGHT tile.

Parameters

[in]	DATA_TYPE	Data type
[in]	SRC_HEIGHT	Number of source rows, or number of columns of the output tile
[in]	SRC_WIDTH	Number of source columns, or number of tile rows
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	X	Starting X position
[in]	Y	Starting Y position
[in]	YI_MULTIPLIER	Parameter used to multiply the internal row increment (_i). In common cases should be 1 but it becomes useful when we want to load rows which are multiple of STRIDE_Y. (e.g. loading the weights of convolution layer). In this case the address calculation is performed as: (Y + _i * Y_MULTIPLIER) * STRIDE_Y
[in]	STRIDE_Y	Stride Y (in bytes) used to load each row.
[out]	dst	Output tile

Definition at line 626 of file tile_helpers.h.

◆ T_MMUL

#define T_MMUL	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		LHS_LAYOUT,
		RHS_LAYOUT,
		lhs,
		rhs,
		dst
	)	T_MMUL_##LHS_LAYOUT##_##RHS_LAYOUT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Matrix multiplication.

Note: Performs: LHS X RHS + DST = DST

Parameters

[in]	LHS_DATA_TYPE	LHS tile data type
[in]	RHS_DATA_TYPE	RHS tile data type
[in]	DST_DATA_TYPE	RHS tile data type
[in]	M0	Number of LHS rows
[in]	N0	Number of RHS columns
[in]	K0	Number of LHS columns
[in]	LHS_LAYOUT	LHS layout (T= transposed, NT= not transposed)
[in]	RHS_LAYOUT	RHS layout (T= transposed, NT= not transposed)
[in]	lhs	LHS tile
[in]	rhs	RHS tile
[in,out]	dst	DST tile

Note: For Int8/UInt8 multiplications, we only have T_MMUL_NT_T because we need the multiply the rows of Lhs and Rhs tensors to utilize dot product extension. Addition of other versions requires dealing with on the fly transposition of these tile elements and therefore is not favored.

Definition at line 1367 of file tile_helpers.h.

◆ T_MMUL_NT_NT

#define T_MMUL_NT_NT	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_NT_##LHS_DATA_TYPE##_##RHS_DATA_TYPE##_##DST_DATA_TYPE(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1389 of file tile_helpers.h.

◆ T_MMUL_NT_NT_FLOAT

#define T_MMUL_NT_NT_FLOAT	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)

Value:

    {                                                                                                                    \
        LOOP_UNROLLING(int, _m, 0, 1, M0,                                                                                \
        {                                                                                                                \
            LOOP_UNROLLING(int, _k, 0, 1, K0,                                                                            \
            {                                                                                                            \
                dst[_m].v = fma((DST_DATA_TYPE)(lhs[_m].s[_k]), (rhs[_k].v), dst[_m].v);                                 \
            })                                                                                                           \
        })                                                                                                               \
    }

Definition at line 1393 of file tile_helpers.h.

◆ T_MMUL_NT_NT_float_float_float

#define T_MMUL_NT_NT_float_float_float	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1390 of file tile_helpers.h.

◆ T_MMUL_NT_NT_half_half_float

#define T_MMUL_NT_NT_half_half_float	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1391 of file tile_helpers.h.

◆ T_MMUL_NT_NT_half_half_half

#define T_MMUL_NT_NT_half_half_half	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1392 of file tile_helpers.h.

◆ T_MMUL_NT_T

#define T_MMUL_NT_T	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_T_##LHS_DATA_TYPE##_##RHS_DATA_TYPE##_##DST_DATA_TYPE(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1368 of file tile_helpers.h.

◆ T_MMUL_NT_T_char_char_int

#define T_MMUL_NT_T_char_char_int	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1372 of file tile_helpers.h.

◆ T_MMUL_NT_T_FLOAT

#define T_MMUL_NT_T_FLOAT	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)

Value:

    {                                                                                     \
        LOOP_UNROLLING(int, _m, 0, 1, M0,                                                 \
        {                                                                                 \
            LOOP_UNROLLING(int, _n, 0, 1, N0,                                             \
            {                                                                             \
                LOOP_UNROLLING(int, _k, 0, 1, K0,                                         \
                {                                                                         \
                    dst[_m].s[_n] = fma((DST_DATA_TYPE)(lhs[_m].s[_k]), (DST_DATA_TYPE)(rhs[_n].s[_k]), dst[_m].s[_n]); \
                })                                                                        \
            })                                                                            \
        })                                                                                \
    }

Definition at line 1375 of file tile_helpers.h.

◆ T_MMUL_NT_T_float_float_float

#define T_MMUL_NT_T_float_float_float	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1369 of file tile_helpers.h.

◆ T_MMUL_NT_T_half_half_float

#define T_MMUL_NT_T_half_half_float	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1370 of file tile_helpers.h.

◆ T_MMUL_NT_T_half_half_half

#define T_MMUL_NT_T_half_half_half	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1371 of file tile_helpers.h.

◆ T_MMUL_NT_T_INTEGER8

#define T_MMUL_NT_T_INTEGER8	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)

Value:

    ({ \
        LOOP_UNROLLING(int, _m, 0, 1, M0, \
        { \
            LOOP_UNROLLING(int, _n, 0, 1, N0, \
            { \
                DOT_PRODUCT_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, K0, (lhs[_m].v), (rhs[_n].v), dst[_m].s[_n]); \
            })                                                                                             \
        })                                                                                             \
    })

Definition at line 1440 of file tile_helpers.h.

◆ T_MMUL_NT_T_uchar_uchar_int

#define T_MMUL_NT_T_uchar_uchar_int	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1374 of file tile_helpers.h.

◆ T_MMUL_NT_T_uchar_uchar_uint

#define T_MMUL_NT_T_uchar_uchar_uint	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_NT_T_INTEGER8(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1373 of file tile_helpers.h.

◆ T_MMUL_T_NT

#define T_MMUL_T_NT	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_T_NT_##LHS_DATA_TYPE##_##RHS_DATA_TYPE##_##DST_DATA_TYPE(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1404 of file tile_helpers.h.

◆ T_MMUL_T_NT_FLOAT

#define T_MMUL_T_NT_FLOAT	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)

Value:

    {                                                                                     \
        LOOP_UNROLLING(int, _m, 0, 1, M0,                                                 \
        {                                                                                 \
            LOOP_UNROLLING(int, _n, 0, 1, N0,                                             \
            {                                                                             \
                LOOP_UNROLLING(int, _k, 0, 1, K0,                                         \
                {                                                                         \
                    dst[_m].s[_n] = fma((DST_DATA_TYPE)(lhs[_k].s[_m]), (DST_DATA_TYPE)(rhs[_k].s[_n]), dst[_m].s[_n]); \
                })                                                                        \
            })                                                                            \
        })                                                                                \
    }

Definition at line 1408 of file tile_helpers.h.

◆ T_MMUL_T_NT_float_float_float

#define T_MMUL_T_NT_float_float_float	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_T_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1405 of file tile_helpers.h.

◆ T_MMUL_T_NT_half_half_float

#define T_MMUL_T_NT_half_half_float	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_T_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1406 of file tile_helpers.h.

◆ T_MMUL_T_NT_half_half_half

#define T_MMUL_T_NT_half_half_half	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_T_NT_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1407 of file tile_helpers.h.

◆ T_MMUL_T_T

#define T_MMUL_T_T	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_T_T_##LHS_DATA_TYPE##_##RHS_DATA_TYPE##_##DST_DATA_TYPE(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1422 of file tile_helpers.h.

◆ T_MMUL_T_T_FLOAT

#define T_MMUL_T_T_FLOAT	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)

Value:

    {                                                                                     \
        LOOP_UNROLLING(int, _m, 0, 1, M0,                                                 \
        {                                                                                 \
            LOOP_UNROLLING(int, _n, 0, 1, N0,                                             \
            {                                                                             \
                LOOP_UNROLLING(int, _k, 0, 1, K0,                                         \
                {                                                                         \
                    dst[_m].s[_n] = fma((DST_DATA_TYPE)(lhs[_k].s[_m]), (DST_DATA_TYPE)(rhs[_n].s[_k]), dst[_m].s[_n]); \
                })                                                                        \
            })                                                                            \
        })                                                                                \
    }

Definition at line 1426 of file tile_helpers.h.

◆ T_MMUL_T_T_float_float_float

#define T_MMUL_T_T_float_float_float	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_T_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1423 of file tile_helpers.h.

◆ T_MMUL_T_T_half_half_float

#define T_MMUL_T_T_half_half_float	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_T_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1424 of file tile_helpers.h.

◆ T_MMUL_T_T_half_half_half

#define T_MMUL_T_T_half_half_half	(	LHS_DATA_TYPE,
		RHS_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		K0,
		lhs,
		rhs,
		dst
	)	T_MMUL_T_T_FLOAT(LHS_DATA_TYPE, RHS_DATA_TYPE, DST_DATA_TYPE, M0, N0, K0, lhs, rhs, dst)

Definition at line 1425 of file tile_helpers.h.

◆ T_OFFSET_CORRECTION

#define T_OFFSET_CORRECTION	(	ACC_DATA_TYPE,
		M0,
		N0,
		K0,
		SRC_OFFSET,
		WEI_OFFSET,
		lhs,
		rhs,
		dst
	)

Value:

    ({                                                                                               \
        LOOP_UNROLLING(int, _m0, 0, 1, M0,                                                           \
        {                                                                                            \
            ACC_DATA_TYPE _tm = 0;                                                                   \
            LOOP_UNROLLING(int, _k0, 0, 1, K0,                                                       \
            {                                                                                        \
                _tm += ((ACC_DATA_TYPE)lhs[_m0].s[_k0] * (ACC_DATA_TYPE)WEI_OFFSET);                 \
            })                                                                                       \
            LOOP_UNROLLING(int, _n0, 0, 1, N0,                                                       \
            {                                                                                        \
                dst[_m0].s[_n0] += _tm;                                                              \
                LOOP_UNROLLING(int, _k0, 0, 1, K0,                                                   \
                {                                                                                    \
                    dst[_m0].s[_n0] += ((ACC_DATA_TYPE)rhs[_n0].s[_k0] * (ACC_DATA_TYPE)SRC_OFFSET); \
                })                                                                                   \
            })                                                                                       \
        })                                                                                          \
    })

Offset correction for the QASYMM8 computation.

Parameters

[in]	ACC_DATA_TYPE	Accumulator data type
[in]	M0	Number of src/dst rows
[in]	N0	Number of src/dst columns
[in]	K0	Number of src columns
[in]	SRC_OFFSET	Source quantization offset
[in]	WEI_OFFSET	Weights quantization shift
[in]	lhs	LHS tile
[in]	rhs	RHS tile
[out]	dst	DST tile

Definition at line 931 of file tile_helpers.h.

◆ T_QUANTIZE8

#define T_QUANTIZE8	(	SRC_DATA_TYPE,
		DST_DATA_TYPE,
		QUANTIZATION_TYPE,
		M0,
		N0,
		DST_OFFSET,
		DST_SHIFT,
		DST_MULTIPLIER,
		src,
		dst_multipliers,
		dst_shifts,
		dst
	)	T_QUANTIZE8_STR(SRC_DATA_TYPE, DST_DATA_TYPE, QUANTIZATION_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)

8-bit quantization with fixed-point scale

Parameters

[in]	SRC_DATA_TYPE	SRC data type
[in]	DST_DATA_TYPE	DST data type
[in]	QUANTIZATION_TYPE	Quantization type (PER_TENSOR or PER_CHANNEL)
[in]	M0	Number of src/dst rows
[in]	N0	Number of src/dst columns
[in]	DST_OFFSET	Quantization offset used for both the per-tensor and per-channel quantization
[in]	DST_SHIFT	Quantization shift for the per-tensor quantization
[in]	DST_MULTIPLIER	Quantization multiplier for the per-tensor quantization
[in]	src	Input tile
[in]	dst_multipliers	Output multipliers tile for the per-channel quantization
[in]	dst_shifts	Output shift tile for the per-channel quantization
[out]	dst	Output tile

Definition at line 966 of file tile_helpers.h.

◆ T_QUANTIZE8_ASYMMETRIC

#define T_QUANTIZE8_ASYMMETRIC	(	SRC_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		DST_OFFSET,
		DST_SHIFT,
		DST_MULTIPLIER,
		src,
		dst
	)

Value:

    ({ \
        LOOP_UNROLLING(int, _m0, 0, 1, M0, \
        { \
            LOOP_UNROLLING(int, _n0, 0, 1, N0, \
            { \
                SRC_DATA_TYPE _tmp = 0; \
                SRC_DATA_TYPE _src = src[_m0].s[_n0]; \
                _src *= select((SRC_DATA_TYPE)1, ((SRC_DATA_TYPE)1 << (SRC_DATA_TYPE)(-DST_SHIFT)), ((SRC_DATA_TYPE)DST_SHIFT < (SRC_DATA_TYPE)0)); \
                SRC_DATA_TYPE overflow = _src == DST_MULTIPLIER && _src == INT_MIN; \
                long a_64 = (long)(_src); \
                long b_64 = (long)(DST_MULTIPLIER); \
                long ab_64 = a_64 * b_64; \
                long mask1 = 1 << 30; \
                long mask2 = 1 - (1 << 30); \
                long is_positive_or_zero = ab_64 >= 0; \
                long nudge = select(mask2, mask1, is_positive_or_zero); \
                SRC_DATA_TYPE ab_x2_high32 = CONVERT((ab_64 + nudge) / (long)(1ll << 31), SRC_DATA_TYPE); \
                _tmp = select(ab_x2_high32, (SRC_DATA_TYPE)INT_MAX, overflow); \
                if(DST_SHIFT >= 0) \
                { \
                    long mask = ((((int)1) << DST_SHIFT) - (int)1); \
                    long threshold = _tmp < (int)0 ? (mask >> 1) + (long)1 : (mask >> 1) + 0; \
                    _tmp = (_tmp & mask) > threshold ? (_tmp >> DST_SHIFT) + (int)1 : (_tmp >> DST_SHIFT); \
                } \
                _tmp += DST_OFFSET; \
                dst[_m0].s[_n0] = CONVERT_SAT(_tmp, DST_DATA_TYPE);                                                                            \
            })                                                                                                                                          \
        })                                                                                                                                          \
    })

Quantized the 8-bit tile with fixed-point scale for asymmetric.

Parameters

[in]	SRC_DATA_TYPE	SRC data type
[in]	DST_DATA_TYPE	DST data type
[in]	M0	Number of src/dst rows
[in]	N0	Number of src/dst columns
[in]	DST_OFFSET	Quantization offset used for both the per-tensor and per-channel quantization
[in]	DST_SHIFT	Quantization shift for the per-tensor quantization
[in]	DST_MULTIPLIER	Quantization multiplier for the per-tensor quantization
[in]	src	Input tile
[out]	dst	Output tile

Definition at line 1073 of file tile_helpers.h.

◆ T_QUANTIZE8_PER_CHANNEL

#define T_QUANTIZE8_PER_CHANNEL	(	SRC_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		DST_OFFSET,
		DST_SHIFT,
		DST_MULTIPLIER,
		src,
		dst_multipliers,
		dst_shifts,
		dst
	)

8-bit per-channel quantization with fixed-point scale

Parameters

[in]	SRC_DATA_TYPE	SRC data type
[in]	DST_DATA_TYPE	DST data type
[in]	M0	Number of src/dst rows
[in]	N0	Number of src/dst columns
[in]	DST_OFFSET	Quantization offset
[in]	DST_SHIFT	(unused)
[in]	DST_MULTIPLIER	(unused)
[in]	src	Input tile
[in]	dst_multipliers	Output multipliers tile for the per-channel quantization
[in]	dst_shifts	Output shift tile for the per-channel quantization
[out]	dst	Output tile

Definition at line 1028 of file tile_helpers.h.

◆ T_QUANTIZE8_PER_TENSOR

#define T_QUANTIZE8_PER_TENSOR	(	SRC_DATA_TYPE,
		DST_DATA_TYPE,
		M0,
		N0,
		DST_OFFSET,
		DST_SHIFT,
		DST_MULTIPLIER,
		src,
		dst_multipliers,
		dst_shifts,
		dst
	)

Value:

    ({ \
        LOOP_UNROLLING(int, _m0, 0, 1, M0, \
        { \
            LOOP_UNROLLING(int, _n0, 0, 1, N0, \
            { \
                SRC_DATA_TYPE _tmp = 0; \
                SRC_DATA_TYPE _src = src[_m0].s[_n0]; \
                _src *= select((SRC_DATA_TYPE)1, ((SRC_DATA_TYPE)1 << (SRC_DATA_TYPE)(-DST_SHIFT)), ((SRC_DATA_TYPE)DST_SHIFT < (SRC_DATA_TYPE)0)); \
                SRC_DATA_TYPE overflow = _src == DST_MULTIPLIER && _src == INT_MIN; \
                long a_64 = (long)(_src); \
                long b_64 = (long)(DST_MULTIPLIER); \
                long ab_64 = a_64 * b_64; \
                long mask1 = 1 << 30; \
                long mask2 = 1 - (1 << 30); \
                long is_positive_or_zero = ab_64 >= 0; \
                long nudge = select(mask2, mask1, is_positive_or_zero); \
                SRC_DATA_TYPE ab_x2_high32 = CONVERT((ab_64 + nudge) / (long)(1ll << 31), SRC_DATA_TYPE); \
                _tmp = select(ab_x2_high32, (SRC_DATA_TYPE)INT_MAX, overflow); \
                if(DST_SHIFT >= 0) \
                { \
                    long mask = ((((int)1) << DST_SHIFT) - (long)1); \
                    long threshold = _tmp < (int)0 ? (mask >> 1) + (long)1 : (mask >> 1) + 0; \
                    _tmp = (_tmp & mask) > threshold ? (_tmp >> DST_SHIFT) + (int)1 : (_tmp >> DST_SHIFT); \
                } \
                _tmp += DST_OFFSET; \
                dst[_m0].s[_n0] = CONVERT_SAT(_tmp, DST_DATA_TYPE);                                                                            \
            })                                                                                                                                          \
        })                                                                                                                                          \
    })

8-bit per-tensor quantization with fixed-point scale

Parameters

[in]	SRC_DATA_TYPE	SRC data type
[in]	DST_DATA_TYPE	DST data type
[in]	M0	Number of src/dst rows
[in]	N0	Number of src/dst columns
[in]	DST_OFFSET	Quantization offset
[in]	DST_SHIFT	Quantization shift for the per-tensor quantization
[in]	DST_MULTIPLIER	Quantization multiplier for the per-tensor quantization
[in]	src	Input tile
[in]	dst_multipliers	(unused)
[in]	dst_shifts	(unused)
[out]	dst	Output tile

Definition at line 983 of file tile_helpers.h.

◆ T_QUANTIZE8_STR

#define T_QUANTIZE8_STR	(	SRC_DATA_TYPE,
		DST_DATA_TYPE,
		QUANTIZATION_TYPE,
		M0,
		N0,
		DST_OFFSET,
		DST_SHIFT,
		DST_MULTIPLIER,
		src,
		dst_multipliers,
		dst_shifts,
		dst
	)	T_QUANTIZE8_##QUANTIZATION_TYPE(SRC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, src, dst_multipliers, dst_shifts, dst)

Definition at line 967 of file tile_helpers.h.

◆ T_ROWSET_MASK

#define T_ROWSET_MASK	(	DATA_TYPE,
		M0,
		N0,
		VALUE_TO_SET,
		a,
		mask
	)

Value:

    ({                                                                                                                                                     \
        LOOP_UNROLLING(int, _m0, 0, 1, M0,                                                                                                                 \
        {                                                                                                                                                  \
            LOOP_UNROLLING(int, _n0, 0, 1, N0,                                                                                                             \
            {                                                                                                                                              \
                a[_m0].s[_n0] = select((DATA_TYPE)(a[_m0].s[_n0]), (DATA_TYPE)(VALUE_TO_SET), (SELECT_DATA_TYPE(DATA_TYPE))(mask[_m0].v == (DATA_TYPE)0)); \
            })                                                                                                                                             \
        })                                                                                                                                                 \
    })

Conditional rowset (memset by row)

Note: Set the row to VALUE_TO_SET if the corresponding mask == 0

Parameters

[in]	DATA_TYPE	Data type
[in]	M0	Number of LHS rows
[in]	N0	Number of LHS columns
[in]	VALUE_TO_SET	Value to set the row
[in,out]	a	Input/output tile
[out]	mask	Mask to check for setting the row to VALUE_TO_SET

Definition at line 1115 of file tile_helpers.h.

◆ T_SCALE_CONSTANT

#define T_SCALE_CONSTANT	(	DATA_TYPE,
		M0,
		N0,
		lhs,
		rhs_constant,
		dst
	)

Value:

    ({                                                            \
        LOOP_UNROLLING(int, _m0, 0, 1, M0,                        \
        {                                                         \
            dst[_m0].v = lhs[_m0].v * (DATA_TYPE)rhs_constant; \
        })                                                        \
    })

Element-wise scale with a constant value.

Note: Performs: LHS * constant = DST

Parameters

[in]	DATA_TYPE	LHS/RHS/DST data type
[in]	M0	Number of LHS rows
[in]	N0	Number of LHS columns
[in]	lhs	LHS tile
[in]	rhs_constant	Constant value
[out]	dst	DST tile

Definition at line 1251 of file tile_helpers.h.

◆ T_STORE_INDIRECT_WIDTH_SELECT

#define T_STORE_INDIRECT_WIDTH_SELECT	(	DATA_TYPE,
		HEIGHT,
		WIDTH0,
		WIDTH1,
		TENSOR_TYPE,
		TENSOR,
		X,
		STRIDE_Y,
		WIDTH1_CONDITION,
		src,
		indirect_y
	)

Value:

    ({                                                                                                                                                                                             \
        if(WIDTH1_CONDITION)                                                                                                                                                                       \
        {                                                                                                                                                                                          \
            LOOP_UNROLLING(int, _i, 0, 1, HEIGHT,                                                                                                                                                  \
            {                                                                                                                                                                                      \
                VSTORE_PARTIAL(WIDTH0, WIDTH1)                                                                                                                                                     \
                (CONVERT(src[HEIGHT - 1 - _i].v, VEC_DATA_TYPE(DATA_TYPE, WIDTH0)), 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \
            })                                                                                                                                                                                     \
        }                                                                                                                                                                                          \
        else                                                                                                                                                                                       \
        {                                                                                                                                                                                          \
            LOOP_UNROLLING(int, _i, 0, 1, HEIGHT,                                                                                                                                                  \
            {                                                                                                                                                                                      \
                VSTORE(WIDTH0)                                                                                                                                                                     \
                (CONVERT(src[HEIGHT - 1 - _i].v, VEC_DATA_TYPE(DATA_TYPE, WIDTH0)), 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \
            })                                                                                                                                                                                     \
        }                                                                                                                                                                                          \
    })

Store a tile to global memory (tensor) using an indirect Y index tile and conditionally use a different length for the store.

Note: If WIDTH1_CONDITION is true, the store will use the WIDTH1 length for the store; The vectors are stored in reverse order so the invalid rows are overwritten by the valid ones

Parameters

[in]	DATA_TYPE	Data type
[in]	HEIGHT	Number of src rows
[in]	WIDTH0	Store width to use if WIDTH1_CONDITION = false
[in]	WIDTH1	Store width to use if WIDTH1_CONDITION = true
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported cl_image is not supported.
[in]	TENSOR	Tensor basename
[in]	X	Starting X position
[in]	STRIDE_Y	Stride Y (in bytes)
[in]	WIDTH1_CONDITION	Condition to select the WIDTH1 store
[in]	src	Input tile
[in]	indirect_y	Indirect Y index tile

Definition at line 899 of file tile_helpers.h.

◆ TENSOR3D_T

#define TENSOR3D_T	(	name,
		type
	)	TENSOR3D_T_STR(name, type)

Definition at line 191 of file tile_helpers.h.

◆ TENSOR3D_T_BUFFER

#define TENSOR3D_T_BUFFER ( name )

Value:

    __global uchar *name##_ptr,  \
    uint        name##_stride_y, \
    uint        name##_stride_z, \
    uint        name##_w,   \
    uint        name##_h,   \
    uint        name##_n,   \
    uint        name##_offset_first_element_in_bytes

Definition at line 181 of file tile_helpers.h.

◆ TENSOR3D_T_IMAGE

#define TENSOR3D_T_IMAGE ( name )

Value:

    __read_only image2d_t name##_img, \
    __global uchar *name##_ptr,       \
    uint        name##_stride_y, \
    uint        name##_stride_z, \
    uint        name##_w,   \
    uint        name##_h,   \
    uint        name##_n,   \
    uint        name##_offset_first_element_in_bytes

Definition at line 171 of file tile_helpers.h.

◆ TENSOR3D_T_STR

#define TENSOR3D_T_STR	(	name,
		type
	)	TENSOR3D_T_##type(name)

Definition at line 190 of file tile_helpers.h.

◆ TENSOR4D

#define TENSOR4D	(	name,
		type
	)	TENSOR4D_STR(name, type)

Definition at line 107 of file tile_helpers.h.

◆ TENSOR4D_BUFFER

#define TENSOR4D_BUFFER ( name )

Value:

    __global uchar *name##_ptr,  \
    uint        name##_stride_x, \
    uint        name##_step_x,   \
    uint        name##_stride_y, \
    uint        name##_step_y,   \
    uint        name##_stride_z, \
    uint        name##_step_z,   \
    uint        name##_stride_w, \
    uint        name##_step_w,   \
    uint        name##_offset_first_element_in_bytes

Definition at line 94 of file tile_helpers.h.

◆ TENSOR4D_IMAGE

#define TENSOR4D_IMAGE ( name )

Value:

    __read_only image2d_t name##_img, \
    __global uchar *name##_ptr,       \
    uint            name##_stride_x,  \
    uint            name##_step_x,    \
    uint            name##_stride_y,  \
    uint            name##_step_y,    \
    uint            name##_stride_z,  \
    uint            name##_step_z,    \
    uint            name##_stride_w,  \
    uint            name##_step_w,    \
    uint            name##_offset_first_element_in_bytes

Definition at line 81 of file tile_helpers.h.

◆ TENSOR4D_RO_T

#define TENSOR4D_RO_T	(	name,
		type
	)	TENSOR4D_RO_T_STR(name, type)

Read-Only (RO) tensor 4D.

Parameters

[in]	name	Tensor name. The tensor name is the prefix of the tensor components
[in]	type	Tensor type (BUFFER or IMAGE)

Definition at line 154 of file tile_helpers.h.

◆ TENSOR4D_RO_T_BUFFER

#define TENSOR4D_RO_T_BUFFER ( name ) TENSOR4D_T_BUFFER(name)

Definition at line 145 of file tile_helpers.h.

◆ TENSOR4D_RO_T_IMAGE

#define TENSOR4D_RO_T_IMAGE ( name )

Value:

__read_only image2d_t name##_img, \

TENSOR4D_T_BUFFER(name)

Definition at line 141 of file tile_helpers.h.

◆ TENSOR4D_RO_T_STR

#define TENSOR4D_RO_T_STR	(	name,
		type
	)	TENSOR4D_RO_T_##type(name)

Definition at line 147 of file tile_helpers.h.

◆ TENSOR4D_STR

#define TENSOR4D_STR	(	name,
		type
	)	TENSOR4D_##type(name)

Definition at line 106 of file tile_helpers.h.

◆ TENSOR4D_T

#define TENSOR4D_T	(	name,
		type
	)	TENSOR4D_T_STR(name, type)

Legacy tensor 4D arguments.

Parameters

[in]	name	Tensor name. The tensor name is the prefix of the tensor components
[in]	type	Tensor type (BUFFER or IMAGE)

Definition at line 139 of file tile_helpers.h.

◆ TENSOR4D_T_BUFFER

#define TENSOR4D_T_BUFFER ( name )

Value:

    __global uchar *name##_ptr,  \
    uint        name##_stride_y, \
    uint        name##_stride_z, \
    uint        name##_stride_w, \
    uint        name##_c,   \
    uint        name##_w,   \
    uint        name##_h,   \
    uint        name##_n,   \
    uint        name##_offset_first_element_in_bytes

Definition at line 121 of file tile_helpers.h.

◆ TENSOR4D_T_IMAGE

#define TENSOR4D_T_IMAGE ( name )

Value:

    __read_only image2d_t name##_img, \
    __global uchar *name##_ptr,       \
    uint        name##_stride_y, \
    uint        name##_stride_z, \
    uint        name##_stride_w, \
    uint        name##_c,   \
    uint        name##_w,   \
    uint        name##_h,   \
    uint        name##_n,   \
    uint        name##_offset_first_element_in_bytes

Definition at line 109 of file tile_helpers.h.

◆ TENSOR4D_T_STR

#define TENSOR4D_T_STR	(	name,
		type
	)	TENSOR4D_T_##type(name)

Definition at line 132 of file tile_helpers.h.

◆ TENSOR4D_WO_T

#define TENSOR4D_WO_T	(	name,
		type
	)	TENSOR4D_WO_T_STR(name, type)

Write-Only (WO) tensor 4D.

Parameters

[in]	name	Tensor name. The tensor name is the prefix of the tensor components
[in]	type	Tensor type (BUFFER or IMAGE)

Definition at line 169 of file tile_helpers.h.

◆ TENSOR4D_WO_T_BUFFER

#define TENSOR4D_WO_T_BUFFER ( name ) TENSOR4D_T_BUFFER(name)

Definition at line 160 of file tile_helpers.h.

◆ TENSOR4D_WO_T_IMAGE

#define TENSOR4D_WO_T_IMAGE ( name )

Value:

__write_only image2d_t name##_img, \

TENSOR4D_T_BUFFER(name)

Definition at line 156 of file tile_helpers.h.

◆ TENSOR4D_WO_T_STR

#define TENSOR4D_WO_T_STR	(	name,
		type
	)	TENSOR4D_WO_T_##type(name)

Definition at line 162 of file tile_helpers.h.

◆ TILE

#define TILE	(	DATA_TYPE,
		H,
		W,
		BASENAME
	)	TILE_STR(DATA_TYPE, H, W, BASENAME)

Tile object A tile object is a 2D memory block and can be accessed using the following syntax:

a[m0].v = access the the vector at row "m0" (OpenCL vector)
1. dst[m0].s[n0] = access the scalar element at row "m0" and column "n0" (scalar access)

Parameters

[in]	DATA_TYPE	Data type of the tile
[in]	H	Number of tile rows
[in]	W	Number of tile colums
[in]	BASENAME	Tile's name

Definition at line 74 of file tile_helpers.h.

◆ TILE_STR

#define TILE_STR	(	DATA_TYPE,
		H,
		W,
		BASENAME
	)

Value:

    union {                                 \
        DATA_TYPE                      s[TILE_VECTOR_SIZE##W];                  \
        TILE_VECTOR_TYPE##W(DATA_TYPE) v;                     \
    } BASENAME[H]

Definition at line 75 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE1

#define TILE_VECTOR_SIZE1 1

Definition at line 30 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE10

#define TILE_VECTOR_SIZE10 16

Definition at line 39 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE11

#define TILE_VECTOR_SIZE11 16

Definition at line 40 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE12

#define TILE_VECTOR_SIZE12 16

Definition at line 41 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE13

#define TILE_VECTOR_SIZE13 16

Definition at line 42 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE14

#define TILE_VECTOR_SIZE14 16

Definition at line 43 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE15

#define TILE_VECTOR_SIZE15 16

Definition at line 44 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE16

#define TILE_VECTOR_SIZE16 16

Definition at line 45 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE2

#define TILE_VECTOR_SIZE2 2

Definition at line 31 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE3

#define TILE_VECTOR_SIZE3 3

Definition at line 32 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE4

#define TILE_VECTOR_SIZE4 4

Definition at line 33 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE5

#define TILE_VECTOR_SIZE5 8

Definition at line 34 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE6

#define TILE_VECTOR_SIZE6 8

Definition at line 35 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE7

#define TILE_VECTOR_SIZE7 8

Definition at line 36 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE8

#define TILE_VECTOR_SIZE8 8

Definition at line 37 of file tile_helpers.h.

◆ TILE_VECTOR_SIZE9

#define TILE_VECTOR_SIZE9 16

Definition at line 38 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE1

#define TILE_VECTOR_TYPE1 ( DATA_TYPE ) DATA_TYPE##1

Definition at line 47 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE10

#define TILE_VECTOR_TYPE10 ( DATA_TYPE ) DATA_TYPE##16

Definition at line 56 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE11

#define TILE_VECTOR_TYPE11 ( DATA_TYPE ) DATA_TYPE##16

Definition at line 57 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE12

#define TILE_VECTOR_TYPE12 ( DATA_TYPE ) DATA_TYPE##16

Definition at line 58 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE13

#define TILE_VECTOR_TYPE13 ( DATA_TYPE ) DATA_TYPE##16

Definition at line 59 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE14

#define TILE_VECTOR_TYPE14 ( DATA_TYPE ) DATA_TYPE##16

Definition at line 60 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE15

#define TILE_VECTOR_TYPE15 ( DATA_TYPE ) DATA_TYPE##16

Definition at line 61 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE16

#define TILE_VECTOR_TYPE16 ( DATA_TYPE ) DATA_TYPE##16

Definition at line 62 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE2

#define TILE_VECTOR_TYPE2 ( DATA_TYPE ) DATA_TYPE##2

Definition at line 48 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE3

#define TILE_VECTOR_TYPE3 ( DATA_TYPE ) DATA_TYPE##3

Definition at line 49 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE4

#define TILE_VECTOR_TYPE4 ( DATA_TYPE ) DATA_TYPE##4

Definition at line 50 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE5

#define TILE_VECTOR_TYPE5 ( DATA_TYPE ) DATA_TYPE##8

Definition at line 51 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE6

#define TILE_VECTOR_TYPE6 ( DATA_TYPE ) DATA_TYPE##8

Definition at line 52 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE7

#define TILE_VECTOR_TYPE7 ( DATA_TYPE ) DATA_TYPE##8

Definition at line 53 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE8

#define TILE_VECTOR_TYPE8 ( DATA_TYPE ) DATA_TYPE##8

Definition at line 54 of file tile_helpers.h.

◆ TILE_VECTOR_TYPE9

#define TILE_VECTOR_TYPE9 ( DATA_TYPE ) DATA_TYPE##16

Definition at line 55 of file tile_helpers.h.

◆ UNROLL_INCR

#define UNROLL_INCR	(	idx,
		step,
		macro
	)	idx += (step); (macro)

Definition at line 194 of file tile_helpers.h.

◆ V_ADD

#define V_ADD	(	A_VAL,
		B_VAL
	)	((A_VAL) + (B_VAL))

Definition at line 1163 of file tile_helpers.h.

◆ V_DIV

#define V_DIV	(	A_VAL,
		B_VAL
	)	((A_VAL) / (B_VAL))

Definition at line 1165 of file tile_helpers.h.

◆ V_LOAD

#define V_LOAD	(	DATA_TYPE,
		WIDTH,
		TENSOR_TYPE,
		TENSOR,
		X,
		Y,
		STRIDE_Y
	)	V_LOAD_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y)

Load a vector from global memory (tensor)

Parameters

[in]	DATA_TYPE	Data type
[in]	WIDTH	Number of dst columns
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	X	Starting X position
[in]	Y	Starting Y position
[in]	STRIDE_Y	Stride Y (in bytes)

Definition at line 492 of file tile_helpers.h.

◆ V_LOAD_BUFFER

#define V_LOAD_BUFFER	(	DATA_TYPE,
		WIDTH,
		TENSOR,
		X,
		Y,
		STRIDE_Y
	)

Value:

VLOAD(WIDTH) \

(0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (Y) * (STRIDE_Y)))

Definition at line 494 of file tile_helpers.h.

◆ V_LOAD_IMAGE

#define V_LOAD_IMAGE	(	DATA_TYPE,
		WIDTH,
		TENSOR,
		X,
		Y,
		STRIDE_Y
	)	READ_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y))

Definition at line 497 of file tile_helpers.h.

◆ V_LOAD_STR

#define V_LOAD_STR	(	DATA_TYPE,
		WIDTH,
		TENSOR_TYPE,
		TENSOR,
		X,
		Y,
		STRIDE_Y
	)	V_LOAD_##TENSOR_TYPE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y)

Definition at line 493 of file tile_helpers.h.

◆ V_MUL

#define V_MUL	(	A_VAL,
		B_VAL
	)	((A_VAL) * (B_VAL))

Definition at line 1166 of file tile_helpers.h.

◆ V_STORE

#define V_STORE	(	DATA_TYPE,
		WIDTH,
		TENSOR_TYPE,
		TENSOR,
		X,
		Y,
		STRIDE_Y,
		VALUES
	)	V_STORE_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y, VALUES)

Store a vector in global memory (tensor)

Parameters

[in]	DATA_TYPE	Data type
[in]	WIDTH	Number of dst columns
[in]	TENSOR_TYPE	Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
[in]	TENSOR	Tensor basename
[in]	X	Starting X position
[in]	Y	Starting Y position
[in]	STRIDE_Y	Stride Y (in bytes)
[in]	VALUES	Values to store in memory

Definition at line 511 of file tile_helpers.h.

◆ V_STORE_BUFFER

#define V_STORE_BUFFER	(	DATA_TYPE,
		WIDTH,
		TENSOR,
		X,
		Y,
		STRIDE_Y,
		VALUES
	)

Value:

VSTORE(WIDTH) \

(VALUES, 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (Y) * (STRIDE_Y)))

Definition at line 513 of file tile_helpers.h.

◆ V_STORE_IMAGE

#define V_STORE_IMAGE	(	DATA_TYPE,
		WIDTH,
		TENSOR,
		X,
		Y,
		STRIDE_Y,
		VALUES
	)	WRITE_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y), VALUES)

Definition at line 516 of file tile_helpers.h.

◆ V_STORE_STR

#define V_STORE_STR	(	DATA_TYPE,
		WIDTH,
		TENSOR_TYPE,
		TENSOR,
		X,
		Y,
		STRIDE_Y,
		VALUES
	)	V_STORE_##TENSOR_TYPE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y, VALUES)

Definition at line 512 of file tile_helpers.h.

◆ V_SUB

#define V_SUB	(	A_VAL,
		B_VAL
	)	((A_VAL) - (B_VAL))

Definition at line 1164 of file tile_helpers.h.

Macros

Macro Definition Documentation

◆ ACT_OP_QUANTIZED

◆ ACTIVATION_QUANTIZED

◆ brelu_op_quantized

◆ COPY_16_VECTOR_TO_TILE_COLUMN

◆ COPY_1_VECTOR_TO_TILE_COLUMN

◆ COPY_2_VECTOR_TO_TILE_COLUMN

◆ COPY_3_VECTOR_TO_TILE_COLUMN

◆ COPY_4_VECTOR_TO_TILE_COLUMN

◆ COPY_8_VECTOR_TO_TILE_COLUMN

◆ COPY_VECTOR_TO_TILE_COLUMN

◆ COPY_VECTOR_TO_TILE_COLUMN_STR

◆ DOT_PRODUCT10_INTEGER8

◆ DOT_PRODUCT11_INTEGER8

◆ DOT_PRODUCT12_INTEGER8

◆ DOT_PRODUCT13_INTEGER8

◆ DOT_PRODUCT14_INTEGER8

◆ DOT_PRODUCT15_INTEGER8

◆ DOT_PRODUCT16_INTEGER8

◆ DOT_PRODUCT1_INTEGER8

◆ DOT_PRODUCT2_INTEGER8

◆ DOT_PRODUCT3_INTEGER8

◆ DOT_PRODUCT4_INTEGER8

◆ DOT_PRODUCT5_INTEGER8

◆ DOT_PRODUCT6_INTEGER8

◆ DOT_PRODUCT7_INTEGER8

◆ DOT_PRODUCT8_INTEGER8

◆ DOT_PRODUCT9_INTEGER8

◆ DOT_PRODUCT_INTEGER8

◆ DOT_PRODUCT_INTEGER8_STR

◆ GET_SPATIAL_IDX

◆ hard_swish_op_quantized

◆ identity_op_quantized

◆ LOOP_UNROLLING

◆ LOOP_UNROLLING_1

◆ LOOP_UNROLLING_10

◆ LOOP_UNROLLING_100

◆ LOOP_UNROLLING_101

◆ LOOP_UNROLLING_102

◆ LOOP_UNROLLING_103

◆ LOOP_UNROLLING_104

◆ LOOP_UNROLLING_105

◆ LOOP_UNROLLING_106

◆ LOOP_UNROLLING_107

◆ LOOP_UNROLLING_108

◆ LOOP_UNROLLING_109

◆ LOOP_UNROLLING_11

◆ LOOP_UNROLLING_110

◆ LOOP_UNROLLING_111

◆ LOOP_UNROLLING_112

◆ LOOP_UNROLLING_113

◆ LOOP_UNROLLING_114

◆ LOOP_UNROLLING_115

◆ LOOP_UNROLLING_116

◆ LOOP_UNROLLING_117

◆ LOOP_UNROLLING_118

◆ LOOP_UNROLLING_119

◆ LOOP_UNROLLING_12

◆ LOOP_UNROLLING_120

◆ LOOP_UNROLLING_121

◆ LOOP_UNROLLING_122

◆ LOOP_UNROLLING_123

◆ LOOP_UNROLLING_124

◆ LOOP_UNROLLING_125

◆ LOOP_UNROLLING_126

◆ LOOP_UNROLLING_127

◆ LOOP_UNROLLING_128

◆ LOOP_UNROLLING_13

◆ LOOP_UNROLLING_14

◆ LOOP_UNROLLING_15

◆ LOOP_UNROLLING_16

◆ LOOP_UNROLLING_17

◆ LOOP_UNROLLING_18

◆ LOOP_UNROLLING_19

◆ LOOP_UNROLLING_2

◆ LOOP_UNROLLING_20

◆ LOOP_UNROLLING_21

◆ LOOP_UNROLLING_22

◆ LOOP_UNROLLING_23