26 #if defined(DATA_TYPE) && defined(INITIAL_VALUE)
27 #define VEC_TYPE(VEC_SIZE) VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
29 #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
30 #define VEC_FLOAT(VEC_SIZE) VEC_DATA_TYPE(float, VEC_SIZE)
31 #define VEC_INT(VEC_SIZE) VEC_DATA_TYPE(int, VEC_SIZE)
32 #define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
33 #define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
34 #define REQUANTIZE(VEC_SIZE, input, in_offset, out_offset, in_scale, out_scale, res) \
36 const VEC_FLOAT(VEC_SIZE) in_f32 = (CONVERT(input, VEC_FLOAT(VEC_SIZE)) - (VEC_FLOAT(VEC_SIZE))((float)in_offset)) * (VEC_FLOAT(VEC_SIZE))((float)in_scale); \
37 const VEC_FLOAT(VEC_SIZE) out_f32 = in_f32 / ((VEC_FLOAT(VEC_SIZE))(float)out_scale) + ((VEC_FLOAT(VEC_SIZE))((float)out_offset)); \
38 res = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_TYPE(VEC_SIZE)); \
43 #define POOL_OP(x, y) ((x) + (y))
45 #define POOL_OP(x, y) (max((x), (y)))
48 #define DIV_OP(x, y) (x * (1.f / y))
51 #error "L2 pooling is not supported"
54 #if defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DST_CHANNELS) && defined(DST_HEIGHT) && defined(DST_BATCH_SIZE) && defined(ACC_DATA_TYPE)
92 __kernel
void pooling_layer_MxN_quantized_nhwc(
98 int offset_c = max((
int)(get_global_id(0) *
VEC_SIZE - (
VEC_SIZE - VEC_SIZE_LEFTOVER) %
VEC_SIZE), 0) *
sizeof(DATA_TYPE);
99 int idx_out_w = get_global_id(1);
100 #if DST_BATCH_SIZE != 1
102 int idx_out_h = get_global_id(2) % DST_HEIGHT;
103 int idx_out_n = get_global_id(2) / DST_HEIGHT;
104 #else //DST_BATCH_SIZE != 1
105 int idx_out_h = get_global_id(2);
107 #endif // DST_BATCH_SIZE != 1
109 int idx_in_w = idx_out_w * STRIDE_X - PAD_X;
110 int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y;
112 __global
unsigned char *in_base_ptr = input_ptr + input_offset_first_element_in_bytes + offset_c + idx_out_n * input_stride_w;
114 __global
unsigned char *out_base_ptr = output_ptr + output_offset_first_element_in_bytes + offset_c + idx_out_w * output_stride_y + idx_out_h * output_stride_z + idx_out_n * output_stride_w;
116 int pool_x_s = max((
int)0, -idx_in_w);
117 int pool_x_e = min((
int)POOL_SIZE_X, (
int)SRC_WIDTH - idx_in_w);
118 int pool_y_s = max((
int)0, -idx_in_h);
119 int pool_y_e = min((
int)POOL_SIZE_Y, (
int)SRC_HEIGHT - idx_in_h);
121 #if defined(POOL_AVG) && defined(EXCLUDE_PADDING)
123 #elif defined(POOL_AVG) && !defined(EXCLUDE_PADDING) // defined(POOL_AVG) && defined(EXCLUDE_PADDING)
124 int filter_size = POOL_SIZE_X * POOL_SIZE_Y;
125 #endif // defined(POOL_AVG) && !defined(EXCLUDE_PADDING)
128 res0 = INITIAL_VALUE;
130 for(
int y = pool_y_s; y < pool_y_e; ++y)
132 for(
int x = pool_x_s; x < pool_x_e; ++x)
139 data =
VLOAD(
VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + (x + idx_in_w) * input_stride_y + (y + idx_in_h) * input_stride_z));
144 #if defined(POOL_AVG) && defined(EXCLUDE_PADDING)
146 #endif // defined(POOL_AVG) && defined(EXCLUDE_PADDING)
150 #if defined(POOL_AVG)
152 #endif // defined(POOL_AVG)
156 #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
157 REQUANTIZE(
VEC_SIZE, out_q0, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_q0);
161 STORE_VECTOR_SELECT(out_q, DATA_TYPE, out_base_ptr,
VEC_SIZE, VEC_SIZE_LEFTOVER, ((VEC_SIZE_LEFTOVER != 0) && get_global_id(0) == 0));
163 #endif // defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DST_CHANNELS) && defined(DST_HEIGHT) && defined(DST_BATCH_SIZE) && defined(ACC_DATA_TYPE)
164 #endif // defined(DATA_TYPE) && defined(INITIAL_VALUE)