26 #if defined(DATA_TYPE) && defined(INITIAL_VALUE) 27 #define VEC_TYPE(VEC_SIZE) VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 29 #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) 30 #define VEC_FLOAT(VEC_SIZE) VEC_DATA_TYPE(float, VEC_SIZE) 31 #define VEC_INT(VEC_SIZE) VEC_DATA_TYPE(int, VEC_SIZE) 32 #define CONVERT_RTE(x, type) (convert_##type##_rte((x))) 33 #define CONVERT_DOWN(x, type) CONVERT_RTE(x, type) 34 #define REQUANTIZE(VEC_SIZE, input, in_offset, out_offset, in_scale, out_scale, res) \ 36 const VEC_FLOAT(VEC_SIZE) in_f32 = (CONVERT(input, VEC_FLOAT(VEC_SIZE)) - (VEC_FLOAT(VEC_SIZE))((float)in_offset)) * (VEC_FLOAT(VEC_SIZE))((float)in_scale); \ 37 const VEC_FLOAT(VEC_SIZE) out_f32 = in_f32 / ((VEC_FLOAT(VEC_SIZE))(float)out_scale) + ((VEC_FLOAT(VEC_SIZE))((float)out_offset)); \ 38 res = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_TYPE(VEC_SIZE)); \ 43 #define POOL_OP(x, y) ((x) + (y)) 45 #define POOL_OP(x, y) (max((x), (y))) 48 #define DIV_OP(x, y) (x * (1.f / y)) 51 #error "L2 pooling is not supported" 54 int calculate_avg_scale(
const int pool_size_x,
const int pool_size_y,
const int upper_bound_w,
const int upper_bound_h,
55 const int pad_x,
const int pad_y,
const int stride_x,
const int stride_y)
57 int start_x = get_global_id(0) * stride_x - pad_x;
58 int start_y = get_global_id(1) * stride_y - pad_y;
59 const int end_x = min(start_x + pool_size_x, upper_bound_w);
60 const int end_y = min(start_y + pool_size_y, upper_bound_h);
61 #if defined(EXCLUDE_PADDING) 62 start_x = max(0, start_x);
63 start_y = max(0, start_y);
65 return ((end_y - start_y) * (end_x - start_x));
96 __kernel
void pooling_layer_MxN_quantized_nchw(
104 int8 vdata = INITIAL_VALUE;
105 int sdata = INITIAL_VALUE;
108 for(
int y = 0; y < POOL_SIZE_Y; y++)
111 for(; x <= ((int)POOL_SIZE_X - 8); x += 8)
115 int8 data0 = convert_int8(data);
120 for(; x < (
int)POOL_SIZE_X; ++x)
123 int data0 = convert_int(data);
129 int4 reduce4 =
POOL_OP(vdata.s0123, vdata.s4567);
130 int2 reduce2 =
POOL_OP(reduce4.s01, reduce4.s23);
131 int res =
POOL_OP(reduce2.s0, reduce2.s1);
134 #if defined(POOL_AVG) 140 #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) 142 const float result_f32 = convert_float(result_q8);
143 const float input_offset = (float)OFFSET_IN1;
144 const float input_scale = (float)SCALE_IN1;
145 const float scale_out = (float)SCALE_OUT;
146 const float offset_out = (float)OFFSET_OUT;
147 const float in_f32 = (result_f32 - input_offset) * input_scale;
148 const float out_f32 = in_f32 / scale_out + offset_out;
156 #if defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DST_CHANNELS) && defined(DST_HEIGHT) && defined(DST_BATCH_SIZE) && defined(ACC_DATA_TYPE) 194 __kernel
void pooling_layer_MxN_quantized_nhwc(
201 int idx_out_w = get_global_id(1);
202 #if DST_BATCH_SIZE != 1 204 int idx_out_h = get_global_id(2) % DST_HEIGHT;
205 int idx_out_n = get_global_id(2) / DST_HEIGHT;
206 #else //DST_BATCH_SIZE != 1 207 int idx_out_h = get_global_id(2);
209 #endif // DST_BATCH_SIZE != 1 211 int idx_in_w = idx_out_w * STRIDE_X - PAD_X;
212 int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y;
214 __global
unsigned char *in_base_ptr = input_ptr + input_offset_first_element_in_bytes + offset_c + idx_out_n * input_stride_w;
216 __global
unsigned char *out_base_ptr = output_ptr + output_offset_first_element_in_bytes + offset_c + idx_out_w * output_stride_y + idx_out_h * output_stride_z + idx_out_n * output_stride_w;
218 int pool_x_s = max((
int)0, -idx_in_w);
219 int pool_x_e = min((
int)POOL_SIZE_X, (
int)SRC_WIDTH - idx_in_w);
220 int pool_y_s = max((
int)0, -idx_in_h);
221 int pool_y_e = min((
int)POOL_SIZE_Y, (
int)SRC_HEIGHT - idx_in_h);
223 #if defined(POOL_AVG) && defined(EXCLUDE_PADDING) 225 #elif defined(POOL_AVG) && !defined(EXCLUDE_PADDING) // defined(POOL_AVG) && defined(EXCLUDE_PADDING) 226 int filter_size = POOL_SIZE_X * POOL_SIZE_Y;
227 #endif // defined(POOL_AVG) && !defined(EXCLUDE_PADDING) 230 res0 = INITIAL_VALUE;
232 for(
int y = pool_y_s; y < pool_y_e; ++y)
234 for(
int x = pool_x_s; x < pool_x_e; ++x)
246 #if defined(POOL_AVG) && defined(EXCLUDE_PADDING) 248 #endif // defined(POOL_AVG) && defined(EXCLUDE_PADDING) 252 #if defined(POOL_AVG) 254 #endif // defined(POOL_AVG) 258 #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) 259 REQUANTIZE(
VEC_SIZE, out_q0, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_q0);
265 #endif // defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DST_CHANNELS) && defined(DST_HEIGHT) && defined(DST_BATCH_SIZE) && defined(ACC_DATA_TYPE) 266 #endif // defined(DATA_TYPE) && defined(INITIAL_VALUE)
#define CONVERT_SAT(a, b)
for(size_t k=0;k< _target.size();++k)
Structure to hold 3D tensor information.
const size_t input_stride_y
ACC_DATA_TYPE calculate_avg_scale(const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y)
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
#define STORE_VECTOR_SELECT(basename, data_type, ptr, vec_size, leftover, cond)
Store a vector that can only be partial in x.
#define CONVERT_TO_TENSOR3D_STRUCT(name)
#define TENSOR4D_DECLARATION(name)
const size_t input_stride_z
__global uchar * ptr
Pointer to the starting postion of the buffer.
#define TENSOR3D_DECLARATION(name)
__global const uchar * tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Get the pointer position of a Tensor3D.
#define VEC_DATA_TYPE(type, size)