28 #define EPS_GRID 0.00001f 30 #if defined(DATA_TYPE) && defined(POOLED_DIM_X) && defined(POOLED_DIM_Y) && defined(MAX_DIM_X) && defined(MAX_DIM_Y) && defined(MAX_DIM_Z) && defined(SPATIAL_SCALE) && defined(OFFSET_IN) && defined(OFFSET_OUT) && defined(SCALE_IN) && defined(SCALE_OUT) && defined(OFFSET_ROIS) && defined(SCALE_ROIS) // Check for compile time constants 55 for(
int iy = 0; iy < grid_size_y; ++iy)
57 for(
int ix = 0; ix < grid_size_x; ++ix)
60 const float y = region_start_y + (iy + 0.5f) * bin_size_y / (
float)grid_size_y;
61 const float x = region_start_x + (ix + 0.5f) * bin_size_x / (
float)grid_size_x;
64 const int y_low = (int)y;
65 const int x_low = (int)x;
66 const int y_high = y_low + 1;
67 const int x_high = x_low + 1;
69 const float ly = y - y_low;
70 const float lx = x - x_low;
71 const float hy = 1.f - ly;
72 const float hx = 1.f - lx;
74 const float w1 = hy * hx;
75 const float w2 = hy * lx;
76 const float w3 = ly * hx;
77 const float w4 = ly * lx;
83 #else // !defined(NHWC) 88 #endif // defined(NHWC) 94 sum += w1 * data1_f32 + w2 * data2_f32 + w3 * data3_f32 + w4 * data4_f32;
98 const float res_f32 = sum / (grid_size_x * grid_size_y);
138 __kernel
void roi_align_layer_quantized(
142 unsigned int input_stride_w,
unsigned int output_stride_w)
150 const int px = get_global_id(1);
151 const int py = get_global_id(2);
152 const int pw = get_global_id(0);
153 #else // !defined(NHWC) 154 const int px = get_global_id(0);
155 const int py = get_global_id(1);
156 const int pw = get_global_id(2);
157 #endif // defined(NHWC) 161 const ushort roi_batch = *((__global ushort *)
offset(&rois, 0, pw));
162 float4 roi =
DEQUANTIZE(vload4(0, (__global ushort *)
offset(&rois, 1, pw)), OFFSET_ROIS, SCALE_ROIS, ushort, 4);
163 float2 roi_anchor = roi.s01 * convert_float(SPATIAL_SCALE);
164 float2 roi_dims = fmax((roi.s23 - roi.s01) * convert_float(SPATIAL_SCALE), 1.f);
167 float2 spatial_indx = (float2)(px, py);
168 float2 pooled_dims = (float2)(POOLED_DIM_X, POOLED_DIM_Y);
169 float2 max_spatial_dims = (float2)(MAX_DIM_X, MAX_DIM_Y);
171 float2 bin_size = (float2)((roi_dims.s0 / (
float)POOLED_DIM_X), (roi_dims.s1 / (
float)POOLED_DIM_Y));
172 float2 region_start = spatial_indx * bin_size + roi_anchor;
173 float2 region_end = (spatial_indx + 1) * bin_size + roi_anchor;
175 region_start =
clamp(region_start, 0, max_spatial_dims);
176 region_end =
clamp(region_end, 0, max_spatial_dims);
178 #if defined(SAMPLING_RATIO) 179 float2 roi_bin_grid = SAMPLING_RATIO;
180 #else // !defined(SAMPLING_RATIO) 182 float2 roi_bin_grid = ceil(bin_size -
EPS_GRID);
183 #endif // defined(SAMPLING_RATIO) 186 input.
ptr += roi_batch * input_stride_w;
187 output.
ptr += pw * output_stride_w;
188 for(
int pz = 0; pz < MAX_DIM_Z; ++pz)
192 #else // !defined(NHWC) 194 #endif // defined(NHWC) 206 #endif // Check for compile time constants __global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
input_data_type roi_align_1x1(const ITensor *input, unsigned int roi_batch, float region_start_x, float bin_size_x, int grid_size_x, float region_end_x, float region_start_y, float bin_size_y, int grid_size_y, float region_end_y, int pz)
Average pooling over an aligned window.
DATA_TYPE sum(__global const DATA_TYPE *input)
Calculate sum of a vector.
#define IMAGE_DECLARATION(name)
Structure to hold 3D tensor information.
DataType clamp(const DataType &n, const DataType &lower=std::numeric_limits< RangeType >::lowest(), const DataType &upper=std::numeric_limits< RangeType >::max())
Performs clamping among a lower and upper value.
#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name)
#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name)
#define DEQUANTIZE(input, offset, scale, type, size)
Structure to hold Image information.
#define QUANTIZE(input, offset, scale, type, size)
__global uchar * ptr
Pointer to the starting postion of the buffer.
#define TENSOR3D_DECLARATION(name)
__global const uchar * tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Get the pointer position of a Tensor3D.