37 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS 43 constexpr std::array<uint8_t, 256> b64_invtab =
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63,
48 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0,
49 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
50 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
51 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
52 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0,
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69 std::string decode_base64(
const std::string &
str)
71 constexpr
const char pad_char =
'=';
87 std::size_t padding = (str.rbegin()[0] == pad_char) + (str.rbegin()[1] == pad_char);
88 const int str_len = str.size();
93 dec_b64.reserve(((str_len / 4) * 3));
97 const int end = str_len - 4 - padding;
98 for(; c <=
end; c += 4)
100 const int byte0 = b64_invtab[str[c]];
101 const int byte1 = b64_invtab[str[c + 1]];
102 const int byte2 = b64_invtab[str[c + 2]];
103 const int byte3 = b64_invtab[str[c + 3]];
105 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
106 dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
107 dec_b64.push_back((byte2 << 6) | (byte3));
113 const int byte0 = b64_invtab[str[c]];
114 const int byte1 = b64_invtab[str[c + 1]];
115 const int byte2 = b64_invtab[str[c + 2]];
117 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
118 dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
120 else if(padding == 2)
122 const int byte0 = b64_invtab[str[c]];
123 const int byte1 = b64_invtab[str[c + 1]];
125 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
137 std::string decompress_zlib(
const std::string &str)
141 if(inflateInit(&ds) != Z_OK)
143 return std::string();
145 ds.avail_in = str.size();
146 ds.next_in = (Bytef *)str.data();
150 char roll_buff[16384];
151 std::string inflated_str;
154 ds.avail_out =
sizeof(roll_buff);
155 ds.next_out =
reinterpret_cast<Bytef *
>(roll_buff);
157 status = inflate(&ds, 0);
158 if(inflated_str.size() < ds.total_out)
160 inflated_str.append(roll_buff, ds.total_out - inflated_str.size());
163 while(status == Z_OK);
167 if(status != Z_STREAM_END)
169 return std::string();
178 const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
180 {
"absdiff",
"absdiff.cl" },
181 {
"accumulate",
"accumulate.cl" },
182 {
"accumulate_squared",
"accumulate.cl" },
183 {
"accumulate_weighted",
"accumulate.cl" },
184 {
"activation_layer",
"activation_layer.cl" },
185 {
"activation_layer_quant",
"activation_layer_quant.cl" },
186 {
"activation_layer_quant_f32",
"activation_layer_quant.cl" },
187 {
"arg_min_max_x",
"arg_min_max.cl" },
188 {
"arg_min_max_y",
"arg_min_max.cl" },
189 {
"arg_min_max_z",
"arg_min_max.cl" },
190 {
"arg_min_max_w",
"arg_min_max.cl" },
191 {
"batch_to_space_nchw",
"batch_to_space.cl" },
192 {
"batch_to_space_static_nchw",
"batch_to_space.cl" },
193 {
"batch_to_space_nhwc",
"batch_to_space.cl" },
194 {
"batch_to_space_static_nhwc",
"batch_to_space.cl" },
195 {
"batchnormalization_layer_nchw",
"batchnormalization_layer.cl" },
196 {
"batchnormalization_layer_nhwc",
"batchnormalization_layer.cl" },
197 {
"bitwise_or",
"bitwise_op.cl" },
198 {
"bitwise_and",
"bitwise_op.cl" },
199 {
"bitwise_xor",
"bitwise_op.cl" },
200 {
"bitwise_not",
"bitwise_op.cl" },
201 {
"bounding_box_transform",
"bounding_box_transform.cl" },
202 {
"bounding_box_transform_quantized",
"bounding_box_transform_quantized.cl" },
203 {
"channel_combine_NV",
"channel_combine.cl" },
204 {
"channel_combine_RGB888",
"channel_combine.cl" },
205 {
"channel_combine_RGBA8888",
"channel_combine.cl" },
206 {
"channel_combine_UYVY422",
"channel_combine.cl" },
207 {
"channel_combine_YUYV422",
"channel_combine.cl" },
208 {
"channel_shuffle_nchw",
"channel_shuffle.cl" },
209 {
"channel_shuffle_nhwc",
"channel_shuffle.cl" },
210 {
"channel_extract_NV12",
"channel_extract.cl" },
211 {
"channel_extract_NV21",
"channel_extract.cl" },
212 {
"channel_extract_RGB888",
"channel_extract.cl" },
213 {
"channel_extract_RGBA8888",
"channel_extract.cl" },
214 {
"channel_extract_UYVY422",
"channel_extract.cl" },
215 {
"channel_extract_YUYV422",
"channel_extract.cl" },
216 {
"combine_gradients_L1",
"canny.cl" },
217 {
"combine_gradients_L2",
"canny.cl" },
218 {
"compare_equal",
"comparisons.cl" },
219 {
"compare_equal_quantized",
"comparisons.cl" },
220 {
"compare_notequal",
"comparisons.cl" },
221 {
"compare_notequal_quantized",
"comparisons.cl" },
222 {
"compare_greater",
"comparisons.cl" },
223 {
"compare_greater_quantized",
"comparisons.cl" },
224 {
"compare_greaterequal",
"comparisons.cl" },
225 {
"compare_greaterequal_quantized",
"comparisons.cl" },
226 {
"compare_less",
"comparisons.cl" },
227 {
"compare_less_quantized",
"comparisons.cl" },
228 {
"compare_lessequal",
"comparisons.cl" },
229 {
"compare_lessequal_quantized",
"comparisons.cl" },
230 {
"concatenate",
"concatenate.cl" },
231 {
"concatenate_width",
"concatenate.cl" },
232 {
"concatenate_height",
"concatenate.cl" },
233 {
"concatenate_width_x2",
"concatenate.cl" },
234 {
"concatenate_width_x4",
"concatenate.cl" },
235 {
"convolution_rectangle",
"convolution_rectangle.cl" },
236 {
"col2im",
"col2im.cl" },
237 {
"convert_depth_down",
"depth_convert.cl" },
238 {
"convert_depth_up",
"depth_convert.cl" },
239 {
"convert_fc_weights",
"convert_fc_weights.cl" },
240 {
"convolution3x3_static",
"convolution3x3.cl" },
241 {
"convolution5x5_static",
"convolution5x5.cl" },
242 {
"convolution7x7_static",
"convolution7x7.cl" },
243 {
"convolution9x9_static",
"convolution9x9.cl" },
244 {
"convolution_separable1x5_static",
"convolution5x5.cl" },
245 {
"convolution_separable5x1_static",
"convolution5x5.cl" },
246 {
"convolution_separable1x7_static",
"convolution7x7.cl" },
247 {
"convolution_separable7x1_static",
"convolution7x7.cl" },
248 {
"convolution_separable1x9_static",
"convolution9x9.cl" },
249 {
"convolution_separable9x1_static",
"convolution9x9.cl" },
250 {
"copy_tensor",
"copy_tensor.cl" },
251 {
"copy_plane",
"channel_extract.cl" },
252 {
"copy_planes_3p",
"channel_combine.cl" },
253 {
"copy_to_keypoint",
"fast_corners.cl" },
254 {
"crop_tensor",
"crop_tensor.cl" },
255 {
"deconvolution_reshape",
"deconvolution_layer.cl" },
256 {
"deconvolution_upsample",
"deconvolution_layer.cl" },
257 {
"depthwise_convolution_3x3",
"depthwise_convolution.cl" },
258 {
"depthwise_convolution_3x3_f16",
"depthwise_convolution.cl" },
259 {
"depthwise_convolution_3x3_nhwc",
"depthwise_convolution.cl" },
260 {
"depthwise_convolution_3x3_nhwc_stride1",
"depthwise_convolution.cl" },
261 {
"dwc_MxN_native_fp_nhwc",
"depthwise_convolution.cl" },
262 {
"dwc_MxN_native_quantized8_nhwc",
"depthwise_convolution_quantized.cl" },
263 {
"dwc_3x3_native_quantized8_nchw",
"depthwise_convolution_quantized.cl" },
264 {
"dwc_3x3_native_quantized8_dot8_nchw",
"depthwise_convolution_quantized.cl" },
265 {
"dwc_3x3_reshaped_quantized8_nhwc",
"depthwise_convolution_quantized.cl" },
266 {
"dwc_3x3_reshaped_quantized8_stride1_nhwc",
"depthwise_convolution_quantized.cl" },
267 {
"dwc_3x3_reshaped_quantized8_dot8_stride1_nhwc",
"depthwise_convolution_quantized.cl" },
268 {
"depth_to_space_nchw",
"depth_to_space.cl" },
269 {
"depth_to_space_nhwc",
"depth_to_space.cl" },
270 {
"depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16",
"depthwise_convolution.cl" },
271 {
"depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16",
"depthwise_convolution.cl" },
272 {
"depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32",
"depthwise_convolution.cl" },
273 {
"depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32",
"depthwise_convolution.cl" },
274 {
"depthwise_convolution_reshape_weights",
"depthwise_convolution.cl" },
275 {
"dequantization_layer",
"dequantization_layer.cl" },
276 {
"dequantization_layer_per_channel_nhwc",
"dequantization_layer.cl" },
277 {
"dequantization_layer_per_channel_nchw",
"dequantization_layer.cl" },
278 {
"derivative",
"derivative.cl" },
279 {
"dilate",
"dilate.cl" },
280 {
"direct_convolution_nhwc",
"direct_convolution.cl" },
281 {
"direct_convolution1x1",
"direct_convolution1x1.cl" },
282 {
"direct_convolution1x1_f32_bifrost",
"direct_convolution1x1.cl" },
283 {
"direct_convolution3x3",
"direct_convolution3x3.cl" },
284 {
"direct_convolution3x3_f32_bifrost",
"direct_convolution3x3.cl" },
285 {
"direct_convolution5x5",
"direct_convolution5x5.cl" },
286 {
"direct_convolution5x5_f32_bifrost",
"direct_convolution5x5.cl" },
287 {
"direct_convolution_quantized",
"direct_convolution_quantized.cl" },
288 {
"elementwise_operation_ADD",
"elementwise_operation.cl" },
289 {
"elementwise_operation_SUB",
"elementwise_operation.cl" },
290 {
"elementwise_operation_MAX",
"elementwise_operation.cl" },
291 {
"elementwise_operation_MIN",
"elementwise_operation.cl" },
292 {
"elementwise_operation_DIV",
"elementwise_operation.cl" },
293 {
"elementwise_operation_SQUARED_DIFF",
"elementwise_operation.cl" },
294 {
"elementwise_operation_POWER",
"elementwise_operation.cl" },
295 {
"elementwise_operation_PRELU",
"elementwise_operation.cl" },
296 {
"elementwise_operation_AND",
"elementwise_operation.cl" },
297 {
"elementwise_operation_OR",
"elementwise_operation.cl" },
298 {
"elementwise_operation_ADD_quantized",
"elementwise_operation_quantized.cl" },
299 {
"elementwise_operation_SUB_quantized",
"elementwise_operation_quantized.cl" },
300 {
"elementwise_operation_MAX_quantized",
"elementwise_operation_quantized.cl" },
301 {
"elementwise_operation_MIN_quantized",
"elementwise_operation_quantized.cl" },
302 {
"elementwise_operation_DIV_quantized",
"elementwise_operation_quantized.cl" },
303 {
"elementwise_operation_SQUARED_DIFF_quantized",
"elementwise_operation_quantized.cl" },
304 {
"elementwise_operation_PRELU_quantized",
"elementwise_operation_quantized.cl" },
305 {
"elementwise_unary",
"elementwise_unary.cl" },
306 {
"erode",
"erode.cl" },
307 {
"fast_corners",
"fast_corners.cl" },
308 {
"fft_digit_reverse_axis_0",
"fft_digit_reverse.cl" },
309 {
"fft_digit_reverse_axis_1",
"fft_digit_reverse.cl" },
310 {
"fft_radix_2_first_stage_axis_0",
"fft.cl" },
311 {
"fft_radix_2_first_stage_axis_1",
"fft.cl" },
312 {
"fft_radix_2_axis_0",
"fft.cl" },
313 {
"fft_radix_2_axis_1",
"fft.cl" },
314 {
"fft_radix_3_first_stage_axis_0",
"fft.cl" },
315 {
"fft_radix_3_first_stage_axis_1",
"fft.cl" },
316 {
"fft_radix_3_axis_0",
"fft.cl" },
317 {
"fft_radix_3_axis_1",
"fft.cl" },
318 {
"fft_radix_4_first_stage_axis_0",
"fft.cl" },
319 {
"fft_radix_4_first_stage_axis_1",
"fft.cl" },
320 {
"fft_radix_4_axis_0",
"fft.cl" },
321 {
"fft_radix_4_axis_1",
"fft.cl" },
322 {
"fft_radix_5_first_stage_axis_0",
"fft.cl" },
323 {
"fft_radix_5_first_stage_axis_1",
"fft.cl" },
324 {
"fft_radix_5_axis_0",
"fft.cl" },
325 {
"fft_radix_5_axis_1",
"fft.cl" },
326 {
"fft_radix_7_first_stage_axis_0",
"fft.cl" },
327 {
"fft_radix_7_first_stage_axis_1",
"fft.cl" },
328 {
"fft_radix_7_axis_0",
"fft.cl" },
329 {
"fft_radix_7_axis_1",
"fft.cl" },
330 {
"fft_radix_8_first_stage_axis_0",
"fft.cl" },
331 {
"fft_radix_8_first_stage_axis_1",
"fft.cl" },
332 {
"fft_radix_8_axis_0",
"fft.cl" },
333 {
"fft_radix_8_axis_1",
"fft.cl" },
334 {
"fft_scale_conj",
"fft_scale.cl" },
335 {
"fill_image_borders_constant",
"fill_border.cl" },
336 {
"fill_image_borders_replicate",
"fill_border.cl" },
337 {
"finalize",
"optical_flow_pyramid_lk.cl" },
338 {
"floor_layer",
"floor.cl" },
339 {
"fuse_batchnormalization_layer",
"batchnormalization_layer.cl" },
340 {
"gather",
"gather.cl" },
341 {
"gaussian1x5_sub_x",
"gaussian_pyramid.cl" },
342 {
"gaussian5x1_sub_y",
"gaussian_pyramid.cl" },
343 {
"gemm_ma_f16",
"gemm.cl" },
344 {
"gemm_ma_f32",
"gemm.cl" },
345 {
"gemm_mv",
"gemv.cl" },
346 {
"gemm_mv_quantized",
"gemv.cl" },
347 {
"gemm_mm_interleaved_transposed_f16",
"gemm_v1.cl" },
348 {
"gemm_mm_interleaved_transposed_f16_acc32",
"gemm_v1.cl" },
349 {
"gemm_mm_interleaved_transposed_f16_bifrost",
"gemm_v1.cl" },
350 {
"gemm_mm_interleaved_transposed_f32",
"gemm_v1.cl" },
351 {
"gemm_mm_interleaved_transposed_f32_bifrost",
"gemm_v1.cl" },
352 {
"gemm_mm_floating_point",
"gemm_v1.cl" },
353 {
"gemm_mm_floating_point_f16_bifrost",
"gemm_v1.cl" },
354 {
"gemm_mm_floating_point_f16_bifrost_acc32",
"gemm_v1.cl" },
355 {
"gemm_mm_floating_point_f32_bifrost",
"gemm_v1.cl" },
356 {
"gemm_mm_floating_point_f32_bifrost_1000",
"gemm_v1.cl" },
357 {
"gemm_mm_native",
"gemm.cl" },
358 {
"gemm_mm_reshaped_lhs_nt_rhs_t",
"gemm.cl" },
359 {
"gemm_mm_reshaped_lhs_nt_rhs_t_texture",
"gemm.cl" },
360 {
"gemm_mm_reshaped_lhs_t_rhs_nt",
"gemm.cl" },
361 {
"gemm_mm_reshaped_lhs_t_rhs_nt_texture",
"gemm.cl" },
362 {
"gemm_mm_reshaped_only_rhs_nt",
"gemm.cl" },
363 {
"gemm_mm_reshaped_only_rhs_nt_texture",
"gemm.cl" },
364 {
"gemm_mm_reshaped_only_rhs_t",
"gemm.cl" },
365 {
"gemm_mm_reshaped_only_rhs_t_texture",
"gemm.cl" },
366 {
"gemm_lc_vm_f32",
"gemm.cl" },
367 {
"gemm_reshape_lhs_matrix_nt",
"gemm.cl" },
368 {
"gemm_reshape_lhs_matrix_t",
"gemm.cl" },
369 {
"gemm_reshape_rhs_matrix_nt",
"gemm.cl" },
370 {
"gemm_reshape_rhs_matrix_t",
"gemm.cl" },
371 {
"gemmlowp_matrix_a_reduction",
"gemmlowp.cl" },
372 {
"gemmlowp_matrix_a_reduction_dot8",
"gemmlowp.cl" },
373 {
"gemmlowp_matrix_b_reduction",
"gemmlowp.cl" },
374 {
"gemmlowp_mm_native",
"gemmlowp.cl" },
375 {
"gemmlowp_mm_reshaped_lhs_nt_rhs_t",
"gemmlowp.cl" },
376 {
"gemmlowp_mm_reshaped_only_rhs_t",
"gemmlowp.cl" },
377 {
"gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint",
"gemmlowp.cl" },
378 {
"gemmlowp_offset_contribution",
"gemmlowp.cl" },
379 {
"gemmlowp_offset_contribution_quantize_down",
"gemmlowp.cl" },
380 {
"gemmlowp_offset_contribution_quantize_down_fixedpoint",
"gemmlowp.cl" },
381 {
"gemmlowp_output_stage_quantize_down",
"gemmlowp.cl" },
382 {
"gemmlowp_output_stage_quantize_down_fixedpoint",
"gemmlowp.cl" },
383 {
"gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16",
"gemmlowp.cl" },
384 {
"gemmlowp_output_stage_quantize_down_float",
"gemmlowp.cl" },
385 {
"generate_proposals_compute_all_anchors",
"generate_proposals.cl" },
386 {
"generate_proposals_compute_all_anchors_quantized",
"generate_proposals_quantized.cl" },
387 {
"harris_score_3x3",
"harris_corners.cl" },
388 {
"harris_score_5x5",
"harris_corners.cl" },
389 {
"harris_score_7x7",
"harris_corners.cl" },
390 {
"hist_border_kernel",
"histogram.cl" },
391 {
"hist_border_kernel_fixed",
"histogram.cl" },
392 {
"hist_local_kernel",
"histogram.cl" },
393 {
"hist_local_kernel_fixed",
"histogram.cl" },
394 {
"hog_block_normalization",
"hog.cl" },
395 {
"hog_detector",
"hog.cl" },
396 {
"hog_orientation_binning",
"hog.cl" },
397 {
"hysteresis",
"canny.cl" },
398 {
"im2col1x1_stridex1_nchw",
"im2col.cl" },
399 {
"im2col3x3_nchw",
"im2col.cl" },
400 {
"im2col5x5_nchw",
"im2col.cl" },
401 {
"im2col11x11_padx0_pady0_nchw",
"im2col.cl" },
402 {
"im2col_generic_nchw",
"im2col.cl" },
403 {
"im2col_generic_padx0_pady0_nchw",
"im2col.cl" },
404 {
"im2col3x3_nhwc",
"im2col.cl" },
405 {
"im2col9x9_nhwc",
"im2col.cl" },
406 {
"im2col_generic_nhwc",
"im2col.cl" },
407 {
"init_level",
"optical_flow_pyramid_lk.cl" },
408 {
"init_level_max",
"optical_flow_pyramid_lk.cl" },
409 {
"init_level_max_initial_estimate",
"optical_flow_pyramid_lk.cl" },
410 {
"instance_normalization",
"instance_normalization.cl" },
411 {
"integral_horizontal",
"integral_image.cl" },
412 {
"integral_vertical",
"integral_image.cl" },
413 {
"IYUV_to_NV12_bt709",
"color_convert.cl" },
414 {
"IYUV_to_RGB888_bt709",
"color_convert.cl" },
415 {
"IYUV_to_RGBA8888_bt709",
"color_convert.cl" },
416 {
"IYUV_to_YUV444_bt709",
"color_convert.cl" },
417 {
"l2_normalize_x",
"l2_normalize.cl" },
418 {
"l2_normalize_y",
"l2_normalize.cl" },
419 {
"l2_normalize_z",
"l2_normalize.cl" },
420 {
"lktracker_stage0",
"optical_flow_pyramid_lk.cl" },
421 {
"lktracker_stage1",
"optical_flow_pyramid_lk.cl" },
422 {
"magnitude_phase",
"magnitude_phase.cl" },
423 {
"max_unpooling_layer_2",
"unpooling_layer.cl" },
424 {
"mean_stddev_accumulate",
"mean_stddev.cl" },
425 {
"mean_stddev_normalization",
"mean_stddev_normalization.cl" },
426 {
"memset",
"memset.cl" },
427 {
"minmax",
"minmaxloc.cl" },
428 {
"minmax_border",
"minmaxloc.cl" },
429 {
"minmax_layer",
"minmax_layer.cl" },
430 {
"minmaxloc",
"minmaxloc.cl" },
431 {
"non_linear_filter_box3x3",
"non_linear_filter3x3.cl" },
432 {
"non_linear_filter_cross3x3",
"non_linear_filter3x3.cl" },
433 {
"non_linear_filter_disk3x3",
"non_linear_filter3x3.cl" },
434 {
"non_linear_filter_box5x5",
"non_linear_filter5x5.cl" },
435 {
"non_linear_filter_cross5x5",
"non_linear_filter5x5.cl" },
436 {
"non_linear_filter_disk5x5",
"non_linear_filter5x5.cl" },
437 {
"non_max_suppression",
"nonmax.cl" },
438 {
"normalization_layer_cross_map",
"normalization_layer.cl" },
439 {
"normalization_layer_in_map_nchw",
"normalization_layer.cl" },
440 {
"normalization_layer_in_map_nhwc",
"normalization_layer.cl" },
441 {
"normalize_planar_yuv_layer_nchw",
"normalize_planar_yuv_layer.cl" },
442 {
"normalize_planar_yuv_layer_nhwc",
"normalize_planar_yuv_layer.cl" },
443 {
"normalize_planar_yuv_layer_q8_nchw",
"normalize_planar_yuv_layer_quantized.cl" },
444 {
"normalize_planar_yuv_layer_q8_nhwc",
"normalize_planar_yuv_layer_quantized.cl" },
445 {
"NV12_to_IYUV_bt709",
"color_convert.cl" },
446 {
"NV12_to_RGB888_bt709",
"color_convert.cl" },
447 {
"NV12_to_RGBA8888_bt709",
"color_convert.cl" },
448 {
"NV12_to_YUV444_bt709",
"color_convert.cl" },
449 {
"NV21_to_IYUV_bt709",
"color_convert.cl" },
450 {
"NV21_to_RGB888_bt709",
"color_convert.cl" },
451 {
"NV21_to_RGBA8888_bt709",
"color_convert.cl" },
452 {
"NV21_to_YUV444_bt709",
"color_convert.cl" },
453 {
"pad_layer_constant",
"pad_layer.cl" },
454 {
"pad_layer_symmetric_reflect",
"pad_layer.cl" },
455 {
"permute",
"permute.cl" },
456 {
"pixelwise_mul_complex",
"pixelwise_mul_float.cl" },
457 {
"pixelwise_mul_float",
"pixelwise_mul_float.cl" },
458 {
"pixelwise_mul_int",
"pixelwise_mul_int.cl" },
459 {
"pixelwise_mul_quantized",
"pixelwise_mul_int.cl" },
460 {
"pooling_layer_2",
"pooling_layer.cl" },
461 {
"pooling_layer_3",
"pooling_layer.cl" },
462 {
"pooling_layer_optimized_3",
"pooling_layer.cl" },
463 {
"pooling_layer_7",
"pooling_layer.cl" },
464 {
"pooling_layer_MxN_nchw",
"pooling_layer.cl" },
465 {
"pooling_layer_MxN_nhwc",
"pooling_layer.cl" },
466 {
"pooling_layer_2x2_nhwc",
"pooling_layer.cl" },
467 {
"pooling_layer_2_nchw_indices_fp32",
"pooling_layer.cl" },
468 {
"pooling_layer_2_nchw_indices_fp16",
"pooling_layer.cl" },
469 {
"pooling_layer_MxN_quantized_nhwc",
"pooling_layer_quantized.cl" },
470 {
"pooling_layer_MxN_quantized_nchw",
"pooling_layer_quantized.cl" },
471 {
"prior_box_layer_nchw",
"prior_box_layer.cl" },
472 {
"qlstm_layer_normalization",
"qlstm_layer_normalization.cl" },
473 {
"quantization_layer",
"quantization_layer.cl" },
474 {
"range",
"range.cl" },
475 {
"range_quantized",
"range.cl" },
476 {
"reduction_operation_x",
"reduction_operation.cl" },
477 {
"reduction_operation_non_parallel_x",
"reduction_operation.cl" },
478 {
"reduction_operation_y",
"reduction_operation.cl" },
479 {
"reduction_operation_z",
"reduction_operation.cl" },
480 {
"reduction_operation_w",
"reduction_operation.cl" },
481 {
"remap_nearest_neighbour",
"remap.cl" },
482 {
"remap_bilinear",
"remap.cl" },
483 {
"reorg_layer_nchw",
"reorg_layer.cl" },
484 {
"reorg_layer_nhwc",
"reorg_layer.cl" },
485 {
"reshape_layer",
"reshape_layer.cl" },
486 {
"reshape_to_columns",
"convolution_layer.cl" },
487 {
"reverse",
"reverse.cl" },
488 {
"RGB888_to_IYUV_bt709",
"color_convert.cl" },
489 {
"RGB888_to_NV12_bt709",
"color_convert.cl" },
490 {
"RGB888_to_RGBA8888_bt709",
"color_convert.cl" },
491 {
"RGB888_to_U8_bt709",
"color_convert.cl" },
492 {
"RGB888_to_YUV444_bt709",
"color_convert.cl" },
493 {
"RGBA8888_to_IYUV_bt709",
"color_convert.cl" },
494 {
"RGBA8888_to_NV12_bt709",
"color_convert.cl" },
495 {
"RGBA8888_to_RGB888_bt709",
"color_convert.cl" },
496 {
"RGBA8888_to_YUV444_bt709",
"color_convert.cl" },
497 {
"roi_align_layer",
"roi_align_layer.cl" },
498 {
"roi_align_layer_quantized",
"roi_align_layer_quantized.cl" },
499 {
"roi_pooling_layer",
"roi_pooling_layer.cl" },
500 {
"scale_nearest_neighbour_nchw",
"scale.cl" },
501 {
"scale_nearest_neighbour_nhwc",
"scale.cl" },
502 {
"scale_bilinear_nchw",
"scale.cl" },
503 {
"scale_bilinear_nhwc",
"scale.cl" },
504 {
"scale_bilinear_quantized_nchw",
"scale_quantized.cl" },
505 {
"scale_bilinear_quantized_nhwc",
"scale_quantized.cl" },
506 {
"scharr3x3",
"scharr_filter.cl" },
507 {
"select_same_rank",
"select.cl" },
508 {
"select_different_rank_2",
"select.cl" },
509 {
"select_different_rank_n",
"select.cl" },
510 {
"sobel3x3",
"sobel_filter.cl" },
511 {
"sobel_separable5x1",
"sobel_filter.cl" },
512 {
"sobel_separable1x5",
"sobel_filter.cl" },
513 {
"sobel_separable7x1",
"sobel_filter.cl" },
514 {
"sobel_separable1x7",
"sobel_filter.cl" },
515 {
"softmax_layer_norm",
"softmax_layer.cl" },
516 {
"softmax_layer_norm_quantized",
"softmax_layer_quantized.cl" },
517 {
"softmax_layer_max_shift_exp_sum_quantized_serial",
"softmax_layer_quantized.cl" },
518 {
"softmax_layer_max_shift_exp_sum_quantized_parallel",
"softmax_layer_quantized.cl" },
519 {
"softmax_layer_max_shift_exp_sum_serial",
"softmax_layer.cl" },
520 {
"space_to_batch_nchw",
"space_to_batch.cl" },
521 {
"space_to_batch_static_nchw",
"space_to_batch.cl" },
522 {
"space_to_batch_nhwc",
"space_to_batch.cl" },
523 {
"space_to_batch_static_nhwc",
"space_to_batch.cl" },
524 {
"space_to_depth_nchw",
"space_to_depth.cl" },
525 {
"space_to_depth_nhwc",
"space_to_depth.cl" },
526 {
"softmax_layer_max_shift_exp_sum_parallel",
"softmax_layer.cl" },
527 {
"stack_layer",
"stack_layer.cl" },
528 {
"strided_slice",
"slice_ops.cl" },
529 {
"suppress_non_maximum",
"canny.cl" },
530 {
"tablelookup_U8",
"tablelookup.cl" },
531 {
"tablelookup_S16",
"tablelookup.cl" },
532 {
"threshold_binary",
"threshold.cl" },
533 {
"threshold_range",
"threshold.cl" },
534 {
"tile",
"tile.cl" },
535 {
"transpose",
"transpose.cl" },
536 {
"UYVY422_to_IYUV_bt709",
"color_convert.cl" },
537 {
"UYVY422_to_NV12_bt709",
"color_convert.cl" },
538 {
"UYVY422_to_RGB888_bt709",
"color_convert.cl" },
539 {
"UYVY422_to_RGBA8888_bt709",
"color_convert.cl" },
540 {
"upsample_layer_nchw",
"upsample_layer.cl" },
541 {
"upsample_layer_nhwc",
"upsample_layer.cl" },
542 {
"warp_affine_nearest_neighbour",
"warp_affine.cl" },
543 {
"warp_affine_bilinear",
"warp_affine.cl" },
544 {
"warp_perspective_nearest_neighbour",
"warp_perspective.cl" },
545 {
"warp_perspective_bilinear",
"warp_perspective.cl" },
546 {
"winograd_filter_transform_2x2_3x3_nchw",
"winograd_filter_transform.cl" },
547 {
"winograd_filter_transform_2x1_3x1_nchw",
"winograd_filter_transform.cl" },
548 {
"winograd_filter_transform_1x2_1x3_nchw",
"winograd_filter_transform.cl" },
549 {
"winograd_filter_transform_4x4_3x3_nchw",
"winograd_filter_transform.cl" },
550 {
"winograd_filter_transform_4x1_3x1_nchw",
"winograd_filter_transform.cl" },
551 {
"winograd_filter_transform_1x4_1x3_nchw",
"winograd_filter_transform.cl" },
552 {
"winograd_filter_transform_4x4_5x5_nchw",
"winograd_filter_transform.cl" },
553 {
"winograd_filter_transform_4x1_5x1_nchw",
"winograd_filter_transform.cl" },
554 {
"winograd_filter_transform_1x4_1x5_nchw",
"winograd_filter_transform.cl" },
555 {
"winograd_filter_transform_4x1_3x1_nhwc",
"winograd_filter_transform.cl" },
556 {
"winograd_filter_transform_1x4_1x3_nhwc",
"winograd_filter_transform.cl" },
557 {
"winograd_filter_transform_4x4_3x3_nhwc",
"winograd_filter_transform.cl" },
558 {
"winograd_filter_transform_4x4_5x5_nhwc",
"winograd_filter_transform.cl" },
559 {
"winograd_filter_transform_4x1_5x1_nhwc",
"winograd_filter_transform.cl" },
560 {
"winograd_filter_transform_1x4_1x5_nhwc",
"winograd_filter_transform.cl" },
561 {
"winograd_filter_transform_2x2_7x7_nhwc",
"winograd_filter_transform.cl" },
562 {
"winograd_filter_transform_2x1_7x1_nhwc",
"winograd_filter_transform.cl" },
563 {
"winograd_filter_transform_1x2_1x7_nhwc",
"winograd_filter_transform.cl" },
564 {
"winograd_input_transform_2x2_3x3_stepz1_nchw",
"winograd_input_transform.cl" },
565 {
"winograd_input_transform_2x2_3x3_stepz2_nchw",
"winograd_input_transform.cl" },
566 {
"winograd_input_transform_2x1_3x1_stepz1_nchw",
"winograd_input_transform.cl" },
567 {
"winograd_input_transform_2x1_3x1_stepz2_nchw",
"winograd_input_transform.cl" },
568 {
"winograd_input_transform_1x2_1x3_stepz1_nchw",
"winograd_input_transform.cl" },
569 {
"winograd_input_transform_1x2_1x3_stepz2_nchw",
"winograd_input_transform.cl" },
570 {
"winograd_input_transform_4x4_3x3_stepz1_nchw",
"winograd_input_transform.cl" },
571 {
"winograd_input_transform_4x1_3x1_stepz1_nchw",
"winograd_input_transform.cl" },
572 {
"winograd_input_transform_1x4_1x3_stepz1_nchw",
"winograd_input_transform.cl" },
573 {
"winograd_input_transform_4x4_5x5_stepz1_nchw",
"winograd_input_transform.cl" },
574 {
"winograd_input_transform_4x1_5x1_stepz1_nchw",
"winograd_input_transform.cl" },
575 {
"winograd_input_transform_1x4_1x5_stepz1_nchw",
"winograd_input_transform.cl" },
576 {
"winograd_input_transform_4x1_3x1_stepz1_nhwc",
"winograd_input_transform.cl" },
577 {
"winograd_input_transform_1x4_1x3_stepz1_nhwc",
"winograd_input_transform.cl" },
578 {
"winograd_input_transform_4x4_3x3_stepz1_nhwc",
"winograd_input_transform.cl" },
579 {
"winograd_input_transform_4x4_5x5_stepz1_nhwc",
"winograd_input_transform.cl" },
580 {
"winograd_input_transform_4x1_5x1_stepz1_nhwc",
"winograd_input_transform.cl" },
581 {
"winograd_input_transform_1x4_1x5_stepz1_nhwc",
"winograd_input_transform.cl" },
582 {
"winograd_input_transform_2x2_7x7_stepz1_nhwc",
"winograd_input_transform.cl" },
583 {
"winograd_input_transform_2x1_7x1_stepz1_nhwc",
"winograd_input_transform.cl" },
584 {
"winograd_input_transform_1x2_1x7_stepz1_nhwc",
"winograd_input_transform.cl" },
585 {
"winograd_output_transform_2x2_3x3_nchw",
"winograd_output_transform.cl" },
586 {
"winograd_output_transform_2x1_3x1_nchw",
"winograd_output_transform.cl" },
587 {
"winograd_output_transform_1x2_1x3_nchw",
"winograd_output_transform.cl" },
588 {
"winograd_output_transform_4x4_3x3_nchw",
"winograd_output_transform.cl" },
589 {
"winograd_output_transform_4x1_3x1_nchw",
"winograd_output_transform.cl" },
590 {
"winograd_output_transform_1x4_1x3_nchw",
"winograd_output_transform.cl" },
591 {
"winograd_output_transform_4x4_5x5_nchw",
"winograd_output_transform.cl" },
592 {
"winograd_output_transform_4x1_5x1_nchw",
"winograd_output_transform.cl" },
593 {
"winograd_output_transform_1x4_1x5_nchw",
"winograd_output_transform.cl" },
594 {
"winograd_output_transform_4x1_3x1_nhwc",
"winograd_output_transform.cl" },
595 {
"winograd_output_transform_1x4_1x3_nhwc",
"winograd_output_transform.cl" },
596 {
"winograd_output_transform_4x4_3x3_nhwc",
"winograd_output_transform.cl" },
597 {
"winograd_output_transform_4x4_5x5_nhwc",
"winograd_output_transform.cl" },
598 {
"winograd_output_transform_4x1_5x1_nhwc",
"winograd_output_transform.cl" },
599 {
"winograd_output_transform_1x4_1x5_nhwc",
"winograd_output_transform.cl" },
600 {
"winograd_output_transform_2x2_7x7_nhwc",
"winograd_output_transform.cl" },
601 {
"winograd_output_transform_2x1_7x1_nhwc",
"winograd_output_transform.cl" },
602 {
"winograd_output_transform_1x2_1x7_nhwc",
"winograd_output_transform.cl" },
603 {
"yolo_layer_nchw",
"yolo_layer.cl" },
604 {
"yolo_layer_nhwc",
"yolo_layer.cl" },
605 {
"YUYV422_to_IYUV_bt709",
"color_convert.cl" },
606 {
"YUYV422_to_NV12_bt709",
"color_convert.cl" },
607 {
"YUYV422_to_RGB888_bt709",
"color_convert.cl" },
608 {
"YUYV422_to_RGBA8888_bt709",
"color_convert.cl" },
611 const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
613 #ifdef EMBEDDED_KERNELS 616 #include "./cl_kernels/absdiff.clembed" 620 #include "./cl_kernels/accumulate.clembed" 623 "activation_layer.cl",
624 #include "./cl_kernels/activation_layer.clembed" 627 "activation_layer_quant.cl",
628 #include "./cl_kernels/activation_layer_quant.clembed" 632 #include "./cl_kernels/arg_min_max.clembed" 636 #include "./cl_kernels/batch_to_space.clembed" 640 #include "./cl_kernels/bitwise_op.clembed" 643 "bounding_box_transform.cl",
644 #include "./cl_kernels/bounding_box_transform.clembed" 647 "bounding_box_transform_quantized.cl",
648 #include "./cl_kernels/bounding_box_transform_quantized.clembed" 652 #include "./cl_kernels/canny.clembed" 655 "channel_combine.cl",
656 #include "./cl_kernels/channel_combine.clembed" 659 "channel_extract.cl",
660 #include "./cl_kernels/channel_extract.clembed" 663 "channel_shuffle.cl",
664 #include "./cl_kernels/channel_shuffle.clembed" 668 #include "./cl_kernels/col2im.clembed" 672 #include "./cl_kernels/comparisons.clembed" 676 #include "./cl_kernels/concatenate.clembed" 680 #include "./cl_kernels/color_convert.clembed" 683 "convert_fc_weights.cl",
684 #include "./cl_kernels/convert_fc_weights.clembed" 688 #include "./cl_kernels/convolution3x3.clembed" 692 #include "./cl_kernels/convolution5x5.clembed" 696 #include "./cl_kernels/convolution7x7.clembed" 700 #include "./cl_kernels/convolution9x9.clembed" 703 "convolution_layer.cl",
704 #include "./cl_kernels/convolution_layer.clembed" 707 "convolution_rectangle.cl",
708 #include "./cl_kernels/convolution_rectangle.clembed" 712 #include "./cl_kernels/copy_tensor.clembed" 716 #include "./cl_kernels/crop_tensor.clembed" 720 #include "./cl_kernels/upsample_layer.clembed" 723 "deconvolution_layer.cl",
724 #include "./cl_kernels/deconvolution_layer.clembed" 728 #include "./cl_kernels/depth_convert.clembed" 732 #include "./cl_kernels/depth_to_space.clembed" 735 "depthwise_convolution.cl",
736 #include "./cl_kernels/depthwise_convolution.clembed" 739 "depthwise_convolution_quantized.cl",
740 #include "./cl_kernels/depthwise_convolution_quantized.clembed" 743 "dequantization_layer.cl",
744 #include "./cl_kernels/dequantization_layer.clembed" 748 #include "./cl_kernels/derivative.clembed" 752 #include "./cl_kernels/dilate.clembed" 755 "direct_convolution1x1.cl",
756 #include "./cl_kernels/direct_convolution1x1.clembed" 759 "direct_convolution3x3.cl",
760 #include "./cl_kernels/direct_convolution3x3.clembed" 763 "direct_convolution5x5.cl",
764 #include "./cl_kernels/direct_convolution5x5.clembed" 767 "direct_convolution_quantized.cl",
768 #include "./cl_kernels/direct_convolution_quantized.clembed" 771 "direct_convolution.cl",
772 #include "./cl_kernels/direct_convolution.clembed" 775 "elementwise_operation.cl",
776 #include "./cl_kernels/elementwise_operation.clembed" 779 "elementwise_operation_quantized.cl",
780 #include "./cl_kernels/elementwise_operation_quantized.clembed" 783 "elementwise_unary.cl",
784 #include "./cl_kernels/elementwise_unary.clembed" 788 #include "./cl_kernels/erode.clembed" 792 #include "./cl_kernels/fast_corners.clembed" 796 #include "./cl_kernels/fft.clembed" 799 "fft_digit_reverse.cl",
800 #include "./cl_kernels/fft_digit_reverse.clembed" 804 #include "./cl_kernels/fft_scale.clembed" 808 #include "./cl_kernels/fill_border.clembed" 812 #include "./cl_kernels/floor.clembed" 816 #include "./cl_kernels/gather.clembed" 819 "gaussian_pyramid.cl",
820 #include "./cl_kernels/gaussian_pyramid.clembed" 824 #include "./cl_kernels/gemm.clembed" 828 #include "./cl_kernels/gemm_v1.clembed" 832 #include "./cl_kernels/gemmlowp.clembed" 836 #include "./cl_kernels/gemv.clembed" 839 "generate_proposals.cl",
840 #include "./cl_kernels/generate_proposals.clembed" 843 "generate_proposals_quantized.cl",
844 #include "./cl_kernels/generate_proposals_quantized.clembed" 848 #include "./cl_kernels/harris_corners.clembed" 852 #include "./cl_kernels/helpers.hembed" 856 #include "./cl_kernels/helpers_asymm.hembed" 860 #include "./cl_kernels/histogram.clembed" 864 #include "./cl_kernels/hog.clembed" 868 #include "./cl_kernels/im2col.clembed" 871 "instance_normalization.cl",
872 #include "./cl_kernels/instance_normalization.clembed" 876 #include "./cl_kernels/integral_image.clembed" 880 #include "./cl_kernels/l2_normalize.clembed" 883 "magnitude_phase.cl",
884 #include "./cl_kernels/magnitude_phase.clembed" 888 #include "./cl_kernels/mean_stddev.clembed" 891 "mean_stddev_normalization.cl",
892 #include "./cl_kernels/mean_stddev_normalization.clembed" 896 #include "./cl_kernels/memset.clembed" 900 #include "./cl_kernels/minmaxloc.clembed" 904 #include "./cl_kernels/minmax_layer.clembed" 907 "non_linear_filter3x3.cl",
908 #include "./cl_kernels/non_linear_filter3x3.clembed" 911 "non_linear_filter5x5.cl",
912 #include "./cl_kernels/non_linear_filter5x5.clembed" 915 "non_linear_filter_helpers.h",
916 #include "./cl_kernels/non_linear_filter_helpers.hembed" 920 #include "./cl_kernels/nonmax.clembed" 923 "normalization_layer.cl",
924 #include "./cl_kernels/normalization_layer.clembed" 927 "normalize_planar_yuv_layer.cl",
928 #include "./cl_kernels/normalize_planar_yuv_layer.clembed" 931 "normalize_planar_yuv_layer_quantized.cl",
932 #include "./cl_kernels/normalize_planar_yuv_layer_quantized.clembed" 935 "batchnormalization_layer.cl",
936 #include "./cl_kernels/batchnormalization_layer.clembed" 939 "optical_flow_pyramid_lk.cl",
940 #include "./cl_kernels/optical_flow_pyramid_lk.clembed" 944 #include "./cl_kernels/pad_layer.clembed" 948 #include "./cl_kernels/permute.clembed" 951 "pixelwise_mul_float.cl",
952 #include "./cl_kernels/pixelwise_mul_float.clembed" 955 "pixelwise_mul_int.cl",
956 #include "./cl_kernels/pixelwise_mul_int.clembed" 960 #include "./cl_kernels/pooling_layer.clembed" 963 "pooling_layer_quantized.cl",
964 #include "./cl_kernels/pooling_layer_quantized.clembed" 967 "prior_box_layer.cl",
968 #include "./cl_kernels/prior_box_layer.clembed" 971 "qlstm_layer_normalization.cl",
972 #include "./cl_kernels/qlstm_layer_normalization.clembed" 975 "quantization_layer.cl",
976 #include "./cl_kernels/quantization_layer.clembed" 980 #include "./cl_kernels/range.clembed" 983 "reduction_operation.cl",
984 #include "./cl_kernels/reduction_operation.clembed" 988 #include "./cl_kernels/remap.clembed" 992 #include "./cl_kernels/reorg_layer.clembed" 996 #include "./cl_kernels/reshape_layer.clembed" 1000 #include "./cl_kernels/reverse.clembed" 1003 "roi_align_layer.cl",
1004 #include "./cl_kernels/roi_align_layer.clembed" 1007 "roi_align_layer_quantized.cl",
1008 #include "./cl_kernels/roi_align_layer_quantized.clembed" 1011 "roi_pooling_layer.cl",
1012 #include "./cl_kernels/roi_pooling_layer.clembed" 1016 #include "./cl_kernels/scale.clembed" 1019 "scale_quantized.cl",
1020 #include "./cl_kernels/scale_quantized.clembed" 1024 #include "./cl_kernels/scharr_filter.clembed" 1028 #include "./cl_kernels/select.clembed" 1032 #include "./cl_kernels/sobel_filter.clembed" 1036 #include "./cl_kernels/softmax_layer.clembed" 1039 "softmax_layer_quantized.cl",
1040 #include "./cl_kernels/softmax_layer_quantized.clembed" 1044 #include "./cl_kernels/slice_ops.clembed" 1047 "space_to_batch.cl",
1048 #include "./cl_kernels/space_to_batch.clembed" 1051 "space_to_depth.cl",
1052 #include "./cl_kernels/space_to_depth.clembed" 1056 #include "./cl_kernels/stack_layer.clembed" 1060 #include "./cl_kernels/tablelookup.clembed" 1064 #include "./cl_kernels/threshold.clembed" 1068 #include "./cl_kernels/tile.clembed" 1072 #include "./cl_kernels/transpose.clembed" 1076 #include "./cl_kernels/types.hembed" 1079 "unpooling_layer.cl",
1080 #include "./cl_kernels/unpooling_layer.clembed" 1084 #include "./cl_kernels/warp_affine.clembed" 1088 #include "./cl_kernels/warp_helpers.hembed" 1091 "warp_perspective.cl",
1092 #include "./cl_kernels/warp_perspective.clembed" 1095 "winograd_filter_transform.cl",
1096 #include "./cl_kernels/winograd_filter_transform.clembed" 1099 "winograd_input_transform.cl",
1100 #include "./cl_kernels/winograd_input_transform.clembed" 1103 "winograd_output_transform.cl",
1104 #include "./cl_kernels/winograd_output_transform.clembed" 1108 #include "./cl_kernels/yolo_layer.clembed" 1113 CLKernelLibrary::CLKernelLibrary()
1114 : _compile_context(), _kernel_path(), _decompressed_source_map()
1122 return _kernel_library;
1130 return _compile_context.
create_kernel(kernel_name, program_name, program.first, _kernel_path, build_options_set, program.second);
1136 auto kernel_program_it = _kernel_program_map.find(kernel_name);
1138 if(_kernel_program_map.end() == kernel_program_it)
1143 const std::string program_name = kernel_program_it->second;
1145 return program_name;
1151 _kernel_path = kernel_path;
1156 _kernel_path = std::move(kernel_path);
1161 return _compile_context.
context();
1181 return _kernel_path;
1216 #ifdef EMBEDDED_KERNELS 1217 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS 1218 const auto inflatted_program_source_it = _decompressed_source_map.find(program_name);
1219 if(inflatted_program_source_it != _decompressed_source_map.end())
1221 return std::make_pair(inflatted_program_source_it->second,
false);
1225 const auto program_source_it = _program_source_map.find(program_name);
1226 if(program_source_it == _program_source_map.end())
1230 std::string program_source = program_source_it->second;
1232 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS 1233 std::string decompressed_program_source = decompress_zlib(decode_base64(program_source_it->second));
1235 _decompressed_source_map.insert(std::make_pair(program_name, decompressed_program_source));
1236 program_source = std::move(decompressed_program_source);
1239 return std::make_pair(program_source,
false);
1242 std::string source_name = _kernel_path + program_name;
1243 std::string binary_name = source_name +
"bin";
1244 std::string program_source{};
1245 bool is_binary =
false;
1247 if(std::ifstream(binary_name).is_open())
1249 program_source =
read_file(binary_name,
true);
1252 else if(std::ifstream(source_name).is_open())
1254 program_source =
read_file(source_name,
false);
1261 return std::make_pair(program_source, is_binary);
1287 return _compile_context;
void set_kernel_path(const std::string &kernel_path)
Sets the path that the kernels reside in.
void set_device(cl::Device device)
Sets the CL device for which the programs are created.
const cl::Device & get_device() const
Gets the CL device for which the programs are created.
cl::Context & context()
Accessor for the associated CL context.
bool is_wbsm_supported() const
void add_built_program(const std::string &built_program_name, const cl::Program &program)
Add a new built program to the cache.
std::string get_device_version()
Return the device version.
#define ARM_COMPUTE_ERROR_VAR(msg,...)
Print the given message then throw an std::runtime_error.
std::pair< std::string, bool > get_program(const std::string &program_name) const
Gets the source of the selected program.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
CLCompileContext & get_compile_context()
Gets the compile context used.
void set_context(cl::Context context)
Sets the CL context used to create programs.
std::string get_device_version() const
Return the device version.
Copyright (c) 2017-2021 Arm Limited.
cl_uint get_num_compute_units()
Return the maximum number of compute units in the device.
size_t max_local_workgroup_size(const cl::Kernel &kernel) const
Find the maximum number of local work items in a workgroup can be supported for the kernel...
cl::NDRange default_ndrange() const
Return the default NDRange for the device.
void clear_programs_cache()
Clear the library's cache of binary programs.
std::string read_file(const std::string &filename, bool binary)
Load an entire file in memory.
void set_device(cl::Device device)
Sets the CL device for which the programs are created.
cl_uint get_num_compute_units() const
Return the maximum number of compute units in the device.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Kernel create_kernel(const std::string &kernel_name, const std::set< std::string > &build_options_set={}) const
Creates a kernel from the kernel library.
cl::NDRange default_ndrange() const
Return the default NDRange for the device.
void end(TokenStream &in, bool &valid)
void init(std::string kernel_path, cl::Context context, cl::Device device)
Initialises the kernel library.
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
std::string get_program_name(const std::string &kernel_name) const
Returns the program name given a kernel name.
void clear_programs_cache()
Clear the library's cache of binary programs.
bool int64_base_atomics_supported() const
Returns true if int64_base_atomics extension is supported by the CL device.
bool fp16_supported() const
Returns true if FP16 is supported by the CL device.
bool int64_base_atomics_supported() const
Returns true if int64_base_atomics extension is supported by the CL device.
const std::map< std::string, cl::Program > & get_built_programs() const
Access the cache of built OpenCL programs.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
bool fp16_supported() const
Returns true if FP16 is supported by the CL device.
size_t max_local_workgroup_size(const cl::Kernel &kernel) const
Find the maximum number of local work items in a workgroup can be supported for the kernel...
std::string get_kernel_path()
Gets the path that the kernels reside in.
void add_built_program(const std::string &built_program_name, const cl::Program &program) const
Add a new built program to the cache.
void set_context(cl::Context context)
Sets the CL context used to create programs.
const std::map< std::string, cl::Program > & get_built_programs() const
Access the cache of built OpenCL programs.
const cl::Device & get_device()
Gets the CL device for which the programs are created.
bool opencl_is_available()
Check if OpenCL is available.
cl::Context & context()
Accessor for the associated CL context.