27 #if defined(FLOAT_DATA_TYPE)
28 #define ISGREATER(x, y) (SELECT_VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE))(isgreater(x, y))
29 #define ISLESS(x, y) (SELECT_VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE))(isless(x, y))
30 #define ISGREATER_SCALAR(x, y) (SELECT_DATA_TYPE(DATA_TYPE_PROMOTED))(isgreater(x, y))
31 #define ISLESS_SCALAR(x, y) (SELECT_DATA_TYPE(DATA_TYPE_PROMOTED))(isless(x, y))
32 #else // !FLOAT_DATA_TYPE
34 #define ISGREATER(x, y) (x > y) ? 1 : 0
35 #define ISLESS(x, y) (x < y) ? 1 : 0
36 #define ISGREATER_SCALAR ISGREATER
37 #define ISLESS_SCALAR ISLESS
38 #else // !defined(WIDTH)
39 #define ISGREATER(x, y) select((VEC_DATA_TYPE(int, VEC_SIZE))0, (VEC_DATA_TYPE(int, VEC_SIZE)) - 1, x > y)
40 #define ISLESS(x, y) select((VEC_DATA_TYPE(int, VEC_SIZE))0, (VEC_DATA_TYPE(int, VEC_SIZE)) - 1, x < y)
41 #endif // defined(WIDTH)
42 #endif // defined(FLOAT_DATA_TYPE)
45 #if defined(OPERATION)
47 #define sum(in0, in1, size) (in0 + SUM_REDUCE(in1, size))
48 #define square_sum(in0, in1, size) (in0 + SUM_REDUCE((in1 * in1), size))
49 #define product(in0, in1, size) (in0 * PROD_REDUCE(in1, size))
50 #define min_(in0, in1, size) (min(in0, MIN_REDUCE(in1, size)))
51 #define max_(in0, in1, size) (max(in0, MAX_REDUCE(in1, size)))
74 __kernel
void reduction_operation_x(
78 int y = get_global_id(1);
79 int z = get_global_id(2);
81 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + y * input_stride_y + z * input_stride_z;
82 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + y * output_stride_y + z * output_stride_z;
84 #if !defined(MIN) && !defined(MAX)
86 DATA_TYPE res = (DATA_TYPE)1;
87 #else // defined(PROD)
88 DATA_TYPE res = (DATA_TYPE)0;
89 #endif // defined(PROD)
90 #else // #if !defined(MIN) && !defined(MAX)
91 DATA_TYPE res = *((__global DATA_TYPE *)input_addr);
92 #endif // #if defined(MIN) || defined(MAX)
98 vals =
VLOAD(
VEC_SIZE)(0, (__global DATA_TYPE *)(input_addr + x *
sizeof(DATA_TYPE)));
99 res = OPERATION(res, vals,
VEC_SIZE);
102 #if(WIDTH % VEC_SIZE)
103 _Pragma(
"unroll")
for(; x < WIDTH; ++x)
105 DATA_TYPE val = *((__global DATA_TYPE *)(input_addr + x *
sizeof(DATA_TYPE)));
106 res = OPERATION(res, val, 1);
108 #endif // (WIDTH % VEC_SIZE)
112 #endif // defined(MEAN)
113 *((__global DATA_TYPE *)output_addr) = res;
115 #endif // defined(OPERATION)
131 __kernel
void reduction_operation_non_parallel_x(
141 #if defined(PROD) && defined(OFFSET) && defined(SCALE)
142 float res_f =
DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
143 #endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
145 for(
unsigned int x = 1; x <
WIDTH; ++x)
149 res =
select(res, in, ISLESS_SCALAR(in, res));
151 res =
select(res, in, ISGREATER_SCALAR(in, res));
153 #if defined(OFFSET) && defined(SCALE)
154 res_f *=
DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
155 #else // !(defined(OFFSET) && defined(SCALE))
157 #endif // defined(OFFSET) && defined(SCALE)
158 #else // defined(SUM))
160 #endif // defined(MAX) || defined(MIN) || defined(PROD)
166 #endif // defined(MEAN)
169 #if defined(SUM) && defined(OFFSET) && defined(SCALE)
170 res -= (
WIDTH - 1) * OFFSET;
171 #endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
174 #if defined(PROD) && defined(OFFSET) && defined(SCALE)
175 res =
QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
176 #endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
178 *((__global DATA_TYPE *)output.
ptr) =
CONVERT_SAT(res, DATA_TYPE);
180 #endif // defined(WIDTH)
196 __kernel
void reduction_operation_y(
197 __global uchar *input_ptr,
200 uint input_offset_first_element_in_bytes,
202 __global uchar *output_ptr,
203 uint output_stride_z,
204 uint output_offset_first_element_in_bytes)
207 int z = get_global_id(1);
209 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x *
sizeof(DATA_TYPE) + z * input_stride_z;
210 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x *
sizeof(DATA_TYPE) + z * output_stride_z;
216 #if defined(PROD) && defined(OFFSET) && defined(SCALE)
219 #endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
221 #if defined(SUM_SQUARE)
223 #endif // defined(SUM_SQUARE)
225 for(
unsigned int y = 1; y <
HEIGHT; ++y)
233 #else // !(defined(MAX) || defined(MIN))
234 #if defined(SUM_SQUARE)
236 #endif // defined(SUM_SQUARE)
239 #if defined(OFFSET) && defined(SCALE)
241 #else // !(defined(OFFSET) && defined(SCALE))
243 #endif // defined(OFFSET) && defined(SCALE)
245 #else // !defined(PROD)
247 #endif // defined(PROD)
248 #endif // defined(MAX) || defined(MIN)
253 #endif // defined(MEAN)
256 #if defined(SUM) && defined(OFFSET) && defined(SCALE)
257 res -= (
HEIGHT - 1) * OFFSET;
258 #endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
261 #if defined(PROD) && defined(OFFSET) && defined(SCALE)
263 #endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
270 #endif // defined(HEIGHT)
288 __kernel
void reduction_operation_z(
289 __global uchar *input_ptr,
293 uint input_offset_first_element_in_bytes,
295 __global uchar *output_ptr,
296 uint output_stride_y,
297 uint output_stride_w,
298 uint output_offset_first_element_in_bytes)
301 int y = get_global_id(1);
302 int w = get_global_id(2);
304 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x *
sizeof(DATA_TYPE) + y * input_stride_y +
w * input_stride_w;
305 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x *
sizeof(DATA_TYPE) + y * output_stride_y +
w * output_stride_w;
311 #if defined(PROD) && defined(OFFSET) && defined(SCALE)
314 #endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
316 #if defined(SUM_SQUARE)
318 #endif // defined(SUM_SQUARE)
320 for(
unsigned int z = 1; z <
DEPTH; ++z)
329 #else // !(defined(MAX) || defined(MIN))
330 #if defined(SUM_SQUARE)
332 #endif // defined(SUM_SQUARE)
335 #if defined(OFFSET) && defined(SCALE)
337 #else // !(defined(OFFSET) && defined(SCALE))
339 #endif // defined(OFFSET) && defined(SCALE)
341 #else // !defined(PROD)
343 #endif // defined(PROD)
344 #endif // defined(MAX) || defined(MIN)
349 #endif // defined(MEAN)
352 #if defined(SUM) && defined(OFFSET) && defined(SCALE)
353 res -= (
DEPTH - 1) * OFFSET;
354 #endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
357 #if defined(PROD) && defined(OFFSET) && defined(SCALE)
359 #endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
369 #if defined(BATCH) && defined(DEPTH)
388 __kernel
void reduction_operation_w(
389 __global uchar *input_ptr,
394 uint input_offset_first_element_in_bytes,
396 __global uchar *output_ptr,
397 uint output_stride_y,
398 uint output_stride_z,
399 uint output_stride_v,
400 uint output_offset_first_element_in_bytes)
403 int y = get_global_id(1);
405 int gid_2 = get_global_id(2);
406 int z = get_global_id(2) %
DEPTH;
407 int v = get_global_id(2) /
DEPTH;
409 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x *
sizeof(DATA_TYPE) + y * input_stride_y + z * input_stride_z + v * input_stride_v;
410 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x *
sizeof(DATA_TYPE) + y * output_stride_y + z * output_stride_z + v * output_stride_v;
416 #if defined(PROD) && defined(OFFSET) && defined(SCALE)
419 #endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
421 #if defined(SUM_SQUARE)
423 #endif // defined(SUM_SQUARE)
425 for(
unsigned int w = 1;
w < BATCH; ++
w)
434 #else // !(defined(MAX) || defined(MIN))
435 #if defined(SUM_SQUARE)
437 #endif // defined(SUM_SQUARE)
440 #if defined(OFFSET) && defined(SCALE)
442 #else // !(defined(OFFSET) && defined(SCALE))
444 #endif // defined(OFFSET) && defined(SCALE)
446 #else // !defined(PROD)
448 #endif //defined(PROD)
449 #endif // defined(MAX) || defined(MIN)
454 #endif // defined(MEAN)
457 #if defined(SUM) && defined(OFFSET) && defined(SCALE)
458 res -= (BATCH - 1) * OFFSET;
459 #endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
462 #if defined(PROD) && defined(OFFSET) && defined(SCALE)
464 #endif // defined(PROD) && defined(OFFSET) && defined(SCALE)