28 #define EPS_GRID 0.00001f
30 #if defined(DATA_TYPE) && defined(POOLED_DIM_X) && defined(POOLED_DIM_Y) && defined(MAX_DIM_X) && defined(MAX_DIM_Y) && defined(MAX_DIM_Z) && defined(SPATIAL_SCALE) // Check for compile time constants
55 for(
int iy = 0; iy < grid_size_y; ++iy)
57 for(
int ix = 0; ix < grid_size_x; ++ix)
60 const float y = region_start_y + (iy + 0.5f) * bin_size_y / (
float)grid_size_y;
61 const float x = region_start_x + (ix + 0.5f) * bin_size_x / (
float)grid_size_x;
64 const int y_low = (int)y;
65 const int x_low = (int)x;
66 const int y_high = y_low + 1;
67 const int x_high = x_low + 1;
69 const float ly = y - y_low;
70 const float lx = x - x_low;
71 const float hy = 1.f - ly;
72 const float hx = 1.f - lx;
74 const float w1 = hy * hx;
75 const float w2 = hy * lx;
76 const float w3 = ly * hx;
77 const float w4 = ly * lx;
83 #else // !defined(NHWC)
88 #endif // defined(NHWC)
89 sum += w1 * data1 + w2 * data2 + w3 * data3 + w4 * data4;
93 return (DATA_TYPE)(sum / (grid_size_x * grid_size_y));
135 unsigned int input_stride_w,
unsigned int output_stride_w)
143 const int px = get_global_id(1);
144 const int py = get_global_id(2);
145 const int pw = get_global_id(0);
146 #else // !defined(NHWC)
147 const int px = get_global_id(0);
148 const int py = get_global_id(1);
149 const int pw = get_global_id(2);
150 #endif // defined(NHWC)
154 const ushort roi_batch = (ushort) * ((__global DATA_TYPE *)
offset(&rois, 0, pw));
156 roi = vload4(0, (__global DATA_TYPE *)
offset(&rois, 1, pw));
157 const float2 roi_anchor = convert_float2(roi.s01) * convert_float(SPATIAL_SCALE);
158 const float2 roi_dims = fmax(convert_float2(roi.s23 - roi.s01) * convert_float(SPATIAL_SCALE), 1.f);
161 const float2 spatial_indx = (float2)(px, py);
162 const float2 pooled_dims = (float2)(POOLED_DIM_X, POOLED_DIM_Y);
163 const float2 max_spatial_dims = (float2)(MAX_DIM_X, MAX_DIM_Y);
165 const float2 bin_size = (float2)((roi_dims.s0 / (
float)POOLED_DIM_X), (roi_dims.s1 / (
float)POOLED_DIM_Y));
166 float2 region_start = spatial_indx * bin_size + roi_anchor;
167 float2 region_end = (spatial_indx + 1) * bin_size + roi_anchor;
169 region_start =
clamp(region_start, 0, max_spatial_dims);
170 region_end =
clamp(region_end, 0, max_spatial_dims);
172 #if defined(SAMPLING_RATIO)
173 const float2 roi_bin_grid = SAMPLING_RATIO;
174 #else // !defined(SAMPLING_RATIO)
176 const float2 roi_bin_grid = ceil(bin_size -
EPS_GRID);
177 #endif // defined(SAMPLING_RATIO)
180 input.ptr += roi_batch * input_stride_w;
181 output.
ptr += pw * output_stride_w;
182 for(
int pz = 0; pz < MAX_DIM_Z; ++pz)
185 __global DATA_TYPE *_output_ptr = (__global DATA_TYPE *)
tensor3D_offset(&output, pz, px, py);
186 #else // !defined(NHWC)
187 __global DATA_TYPE *_output_ptr = (__global DATA_TYPE *)
tensor3D_offset(&output, px, py, pz);
188 #endif // defined(NHWC)
200 #endif // Check for compile time constants