27 #define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##_sat##round(x))
29 #define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##round(x))
31 #define CONVERT_OP_FLOAT(x, type, round) CONVERT_OP_FLOAT_STR(x, type, round)
33 #if defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(ACC_DATA_TYPE) && defined(DATA_TYPE_OUT)
35 #if defined(ACTIVATION_TYPE)
37 #endif // defined(ACTIVATION_TYPE)
39 #define VEC_ACC_TYPE VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE_OUT)
40 #define VEC_OUT_TYPE VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT)
41 #define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE_OUT)
77 __kernel
void pixelwise_mul_float(
80 #
if !defined(IN_PLACE)
86 size_t x = max((
int)(get_global_id(0) * VEC_SIZE_OUT - (VEC_SIZE_OUT - VEC_SIZE_LEFTOVER) % VEC_SIZE_OUT), 0);
87 size_t y = get_global_id(1);
88 size_t z = get_global_id(2);
90 __global uchar *in1_addr = in1_ptr + in1_offset_first_element_in_bytes + x * in1_stride_x + y * in1_stride_y + z * in1_stride_z;
91 __global uchar *in2_addr = in2_ptr + in2_offset_first_element_in_bytes + x * in2_stride_x + y * in2_stride_y + z * in2_stride_z;
93 #if !defined(IN_PLACE)
94 out_addr = out_ptr + out_offset_first_element_in_bytes + x * out_stride_x + y * out_stride_y + z * out_stride_z;
95 #else // !defined(IN_PLACE)
96 #if defined(SRC1_IN_PLACE)
98 #else //defined(SRC1_IN_PLACE)
100 #endif //defined(SRC1_IN_PLACE)
101 #endif // !defined(IN_PLACE)
104 VEC_ACC_TYPE in1_data =
CONVERT((
VEC_DATA_TYPE(DATA_TYPE_IN1, VEC_SIZE_OUT))(
VLOAD(VEC_SIZE_IN1)(0, (__global DATA_TYPE_IN1 *)in1_addr)), VEC_ACC_TYPE);
105 VEC_ACC_TYPE in2_data =
CONVERT((
VEC_DATA_TYPE(DATA_TYPE_IN2, VEC_SIZE_OUT))(
VLOAD(VEC_SIZE_IN2)(0, (__global DATA_TYPE_IN2 *)in2_addr)), VEC_ACC_TYPE);
108 #ifdef DATA_TYPE_FLOAT
109 VEC_OUT_TYPE res0 =
CONVERT(in1_data * in2_data * (ACC_DATA_TYPE)
scale, VEC_OUT_TYPE);
114 #if defined(ACTIVATION_TYPE)
115 res0 =
ACTIVATION(ACTIVATION_TYPE, DATA_TYPE_OUT, VEC_SIZE_OUT, res0, A_VAL, B_VAL);
116 #endif // defined(ACTIVATION_TYPE)
118 STORE_VECTOR_SELECT(res, DATA_TYPE_OUT, out_addr, VEC_SIZE_OUT, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
122 #if defined(DATA_TYPE)
151 __kernel
void pixelwise_mul_complex(
163 vin1 = vload2(0, (__global DATA_TYPE *)in1.ptr);
165 vin2 = vload2(0, (__global DATA_TYPE *)in2.ptr);
169 res = { vin1.x *vin2.x - vin1.y * vin2.y, vin1.x *vin2.y + vin2.x * vin1.y };
171 #if defined(ACTIVATION_TYPE)
172 vstore2(
ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE_OUT, res, A_VAL, B_VAL), 0, (__global DATA_TYPE *)out.
ptr);
173 #else // defined(ACTIVATION_TYPE)
175 vstore2(res, 0, (__global DATA_TYPE *)out.
ptr);
176 #endif // defined(ACTIVATION_TYPE)
179 #endif // defined(DATA_TYPE)