26 #define ADD_OP(a, b) ((a) + (b)) 27 #define SUB_OP(a, b) ((a) - (b)) 28 #define MUL_OP(a, b) ((a) * (b)) 29 #define INVSQRT_OP(a) rsqrt((a)) 30 #define SQCVT_SAT(a) (a) 32 #if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(ACTIVATION_TYPE) 80 #ifndef USE_DEFAULT_BETA
83 #ifndef USE_DEFAULT_GAMMA
96 #ifndef USE_DEFAULT_BETA 99 #ifndef USE_DEFAULT_GAMMA 114 const int current_slice = get_global_id(2);
122 numerator =
SUB_OP(data, numerator);
123 x_bar =
MUL_OP(numerator, denominator);
125 #ifndef USE_DEFAULT_GAMMA 129 res =
MUL_OP(gamma_vec, x_bar);
135 #ifndef USE_DEFAULT_BETA 139 res =
ADD_OP(res, beta_vec);
193 #ifndef USE_DEFAULT_BETA
196 #ifndef USE_DEFAULT_GAMMA
203 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs + get_global_id(1) *
input_stride_y + get_global_id(2) *
input_stride_z;
205 __global uchar *output_addr = input_ptr;
207 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z;
209 __global uchar *mean_addr = mean_ptr + mean_offset_first_element_in_bytes + x_offs;
210 __global uchar *var_addr = var_ptr + var_offset_first_element_in_bytes + x_offs;
211 #ifndef USE_DEFAULT_BETA 212 __global uchar *beta_addr = beta_ptr + beta_offset_first_element_in_bytes + x_offs;
214 #ifndef USE_DEFAULT_GAMMA 215 __global uchar *gamma_addr = gamma_ptr + gamma_offset_first_element_in_bytes + x_offs;
235 numerator =
SUB_OP(data, numerator);
236 x_bar =
MUL_OP(numerator, denominator);
238 #ifndef USE_DEFAULT_GAMMA 242 res0 =
MUL_OP(gamma_vec, x_bar);
248 #ifndef USE_DEFAULT_BETA 252 res0 =
ADD_OP(res0, beta_vec);
261 #if defined(DATA_TYPE) && defined(EPSILON) 340 int x = get_global_id(0);
341 int y = get_global_id(1);
342 int z = get_global_id(2);
347 #else // ! defined(DIM2) 351 #else // defined(NHWC) 353 #endif // defined(NHWC) 354 #endif // defined(DIM2) 356 int w_offset = x *
sizeof(
DATA_TYPE) + y * w_stride_y + z * w_stride_z;
368 w_old = *((__global
DATA_TYPE *)(w_ptr + w_offset + w_offset_first_element_in_bytes));
369 var = *((__global
DATA_TYPE *)(var_ptr + v_offset + var_offset_first_element_in_bytes));
370 mean = *((__global
DATA_TYPE *)(mean_ptr + v_offset + mean_offset_first_element_in_bytes));
373 gamma = *((__global
DATA_TYPE *)(gamma_ptr + v_offset + gamma_offset_first_element_in_bytes));
374 #endif // defined(GAMMA) 377 w_new = (gamma * w_old) / (sqrt(var + EPSILON));
379 #if defined(IN_PLACE_W) 380 *((__global
DATA_TYPE *)(w_ptr + w_offset + w_offset_first_element_in_bytes)) = w_new;
381 #else // defined(IN_PLACE_W) 382 *((__global
DATA_TYPE *)(w_fused_ptr + w_offset + w_fused_offset_first_element_in_bytes)) = w_new;
383 #endif // defined(IN_PLACE_W) 386 #if !defined(DIM2) && defined(NHWC) 388 #else // !defined(DIM2) && defined(NHWC) 389 if(x == 0 && y == 0 && c0 == 0)
390 #endif // !defined(DIM2) && defined(NHWC) 393 b_old = *((__global
DATA_TYPE *)(b_ptr + v_offset + b_offset_first_element_in_bytes));
394 #endif // defined(BIAS) 396 beta = *((__global
DATA_TYPE *)(beta_ptr + v_offset + beta_offset_first_element_in_bytes));
397 #endif // defined(BETA) 399 b_new = ((gamma * (b_old - mean)) / (sqrt(var + EPSILON))) + beta;
403 #if defined(IN_PLACE_B) 404 *((__global
DATA_TYPE *)(b_ptr + v_offset + b_offset_first_element_in_bytes)) = b_new;
405 #else // defined(IN_PLACE_B) 406 *((__global
DATA_TYPE *)(b_fused_ptr + v_offset + b_fused_offset_first_element_in_bytes)) = b_new;
407 #endif // defined(IN_PLACE_B) 409 #else // defined(BIAS) 412 *((__global
DATA_TYPE *)(b_fused_ptr + v_offset + b_fused_offset_first_element_in_bytes)) = b_new;
413 #endif // ifndef IN_PLACE_B 415 #endif // defined(BIAS) 418 #endif // defined(DATA_TYPE) && defined(EPSILON) Structure to hold Vector information.
Structure to hold 3D tensor information.
const size_t input_stride_y
#define CONVERT_TO_VECTOR_STRUCT(name)
#define VECTOR_DECLARATION(name)
int stride_x
Stride of the image in X dimension (in bytes)
__global uchar * ptr
Pointer to the starting postion of the buffer.
#define STORE_VECTOR_SELECT(basename, data_type, ptr, vec_size, leftover, cond)
Store a vector that can only be partial in x.
#define CONVERT_TO_TENSOR3D_STRUCT(name)
const size_t input_stride_z
__global uchar * ptr
Pointer to the starting postion of the buffer.
#define TENSOR3D_DECLARATION(name)
#define ACTIVATION(op, DATA_TYPE, VEC_SIZE, x, A_VAL, B_VAL)
#define VEC_DATA_TYPE(type, size)