34 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS 40 constexpr std::array<uint8_t, 256> b64_invtab =
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63,
45 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0,
46 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
47 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
48 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
49 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0,
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
66 std::string decode_base64(
const std::string &
str)
68 constexpr
const char pad_char =
'=';
84 std::size_t padding = (str.rbegin()[0] == pad_char) + (str.rbegin()[1] == pad_char);
85 const int str_len = str.size();
90 dec_b64.reserve(((str_len / 4) * 3));
94 const int end = str_len - 4 - padding;
95 for(; c <=
end; c += 4)
97 const int byte0 = b64_invtab[str[c]];
98 const int byte1 = b64_invtab[str[c + 1]];
99 const int byte2 = b64_invtab[str[c + 2]];
100 const int byte3 = b64_invtab[str[c + 3]];
102 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
103 dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
104 dec_b64.push_back((byte2 << 6) | (byte3));
110 const int byte0 = b64_invtab[str[c]];
111 const int byte1 = b64_invtab[str[c + 1]];
112 const int byte2 = b64_invtab[str[c + 2]];
114 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
115 dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
117 else if(padding == 2)
119 const int byte0 = b64_invtab[str[c]];
120 const int byte1 = b64_invtab[str[c + 1]];
122 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
134 std::string decompress_zlib(
const std::string &str)
138 if(inflateInit(&ds) != Z_OK)
140 return std::string();
142 ds.avail_in = str.size();
143 ds.next_in = (Bytef *)str.data();
147 char roll_buff[16384];
148 std::string inflated_str;
151 ds.avail_out =
sizeof(roll_buff);
152 ds.next_out =
reinterpret_cast<Bytef *
>(roll_buff);
154 status = inflate(&ds, 0);
155 if(inflated_str.size() < ds.total_out)
157 inflated_str.append(roll_buff, ds.total_out - inflated_str.size());
160 while(status == Z_OK);
164 if(status != Z_STREAM_END)
166 return std::string();
178 const std::map<std::string, std::string> ClKernelLibrary::_kernel_program_map =
181 {
"activation_layer",
"common/activation_layer.cl" },
182 {
"activation_layer_quant",
"common/activation_layer_quant.cl" },
183 {
"activation_layer_quant_f32",
"common/activation_layer_quant.cl" },
184 {
"arg_min_max_x",
"common/arg_min_max.cl" },
185 {
"arg_min_max_y",
"common/arg_min_max.cl" },
186 {
"arg_min_max_z",
"common/arg_min_max.cl" },
187 {
"arg_min_max_w",
"common/arg_min_max.cl" },
188 {
"bitwise_or",
"common/bitwise_op.cl" },
189 {
"bitwise_and",
"common/bitwise_op.cl" },
190 {
"bitwise_xor",
"common/bitwise_op.cl" },
191 {
"bitwise_not",
"common/bitwise_op.cl" },
192 {
"bounding_box_transform",
"common/bounding_box_transform.cl" },
193 {
"bounding_box_transform_quantized",
"common/bounding_box_transform_quantized.cl" },
194 {
"compare_equal",
"common/comparisons.cl" },
195 {
"compare_equal_quantized",
"common/comparisons.cl" },
196 {
"compare_notequal",
"common/comparisons.cl" },
197 {
"compare_notequal_quantized",
"common/comparisons.cl" },
198 {
"compare_greater",
"common/comparisons.cl" },
199 {
"compare_greater_quantized",
"common/comparisons.cl" },
200 {
"compare_greaterequal",
"common/comparisons.cl" },
201 {
"compare_greaterequal_quantized",
"common/comparisons.cl" },
202 {
"compare_less",
"common/comparisons.cl" },
203 {
"compare_less_quantized",
"common/comparisons.cl" },
204 {
"compare_lessequal",
"common/comparisons.cl" },
205 {
"compare_lessequal_quantized",
"common/comparisons.cl" },
206 {
"concatenate",
"common/concatenate.cl" },
207 {
"concatenate_width",
"common/concatenate.cl" },
208 {
"concatenate_height",
"common/concatenate.cl" },
209 {
"concatenate_width_x2",
"common/concatenate.cl" },
210 {
"concatenate_width_x4",
"common/concatenate.cl" },
211 {
"col2im",
"common/col2im.cl" },
212 {
"cast_down",
"common/cast.cl" },
213 {
"cast_up",
"common/cast.cl" },
214 {
"convert_fc_weights",
"common/convert_fc_weights.cl" },
215 {
"copy_tensor",
"common/copy_tensor.cl" },
216 {
"crop_tensor",
"common/crop_tensor.cl" },
217 {
"deconvolution_reshape",
"common/deconvolution_layer.cl" },
218 {
"deconvolution_upsample",
"common/deconvolution_layer.cl" },
219 {
"dequantization_layer",
"common/dequantization_layer.cl" },
220 {
"elementwise_operation_ADD",
"common/elementwise_operation.cl" },
221 {
"elementwise_operation_SUB",
"common/elementwise_operation.cl" },
222 {
"elementwise_operation_MAX",
"common/elementwise_operation.cl" },
223 {
"elementwise_operation_MIN",
"common/elementwise_operation.cl" },
224 {
"elementwise_operation_DIV",
"common/elementwise_operation.cl" },
225 {
"elementwise_operation_SQUARED_DIFF",
"common/elementwise_operation.cl" },
226 {
"elementwise_operation_POWER",
"common/elementwise_operation.cl" },
227 {
"elementwise_operation_PRELU",
"common/elementwise_operation.cl" },
228 {
"elementwise_operation_AND",
"common/elementwise_operation.cl" },
229 {
"elementwise_operation_OR",
"common/elementwise_operation.cl" },
230 {
"elementwise_operation_ADD_quantized",
"common/elementwise_operation_quantized.cl" },
231 {
"elementwise_operation_SUB_quantized",
"common/elementwise_operation_quantized.cl" },
232 {
"elementwise_operation_MAX_quantized",
"common/elementwise_operation_quantized.cl" },
233 {
"elementwise_operation_MIN_quantized",
"common/elementwise_operation_quantized.cl" },
234 {
"elementwise_operation_DIV_quantized",
"common/elementwise_operation_quantized.cl" },
235 {
"elementwise_operation_SQUARED_DIFF_quantized",
"common/elementwise_operation_quantized.cl" },
236 {
"elementwise_operation_PRELU_quantized",
"common/elementwise_operation_quantized.cl" },
237 {
"elementwise_unary",
"common/elementwise_unary.cl" },
238 {
"fft_digit_reverse_axis_0",
"common/fft_digit_reverse.cl" },
239 {
"fft_digit_reverse_axis_1",
"common/fft_digit_reverse.cl" },
240 {
"fft_radix_2_first_stage_axis_0",
"common/fft.cl" },
241 {
"fft_radix_2_first_stage_axis_1",
"common/fft.cl" },
242 {
"fft_radix_2_axis_0",
"common/fft.cl" },
243 {
"fft_radix_2_axis_1",
"common/fft.cl" },
244 {
"fft_radix_3_first_stage_axis_0",
"common/fft.cl" },
245 {
"fft_radix_3_first_stage_axis_1",
"common/fft.cl" },
246 {
"fft_radix_3_axis_0",
"common/fft.cl" },
247 {
"fft_radix_3_axis_1",
"common/fft.cl" },
248 {
"fft_radix_4_first_stage_axis_0",
"common/fft.cl" },
249 {
"fft_radix_4_first_stage_axis_1",
"common/fft.cl" },
250 {
"fft_radix_4_axis_0",
"common/fft.cl" },
251 {
"fft_radix_4_axis_1",
"common/fft.cl" },
252 {
"fft_radix_5_first_stage_axis_0",
"common/fft.cl" },
253 {
"fft_radix_5_first_stage_axis_1",
"common/fft.cl" },
254 {
"fft_radix_5_axis_0",
"common/fft.cl" },
255 {
"fft_radix_5_axis_1",
"common/fft.cl" },
256 {
"fft_radix_7_first_stage_axis_0",
"common/fft.cl" },
257 {
"fft_radix_7_first_stage_axis_1",
"common/fft.cl" },
258 {
"fft_radix_7_axis_0",
"common/fft.cl" },
259 {
"fft_radix_7_axis_1",
"common/fft.cl" },
260 {
"fft_radix_8_first_stage_axis_0",
"common/fft.cl" },
261 {
"fft_radix_8_first_stage_axis_1",
"common/fft.cl" },
262 {
"fft_radix_8_axis_0",
"common/fft.cl" },
263 {
"fft_radix_8_axis_1",
"common/fft.cl" },
264 {
"fft_scale_conj",
"common/fft_scale.cl" },
265 {
"fill_image_borders_constant",
"common/fill_border.cl" },
266 {
"fill_image_borders_replicate",
"common/fill_border.cl" },
267 {
"floor_layer",
"common/floor.cl" },
268 {
"fuse_batchnormalization_layer",
"common/batchnormalization_layer.cl" },
269 {
"gather",
"common/gather.cl" },
270 {
"gemm_ma_f16",
"common/gemm.cl" },
271 {
"gemm_ma_f32",
"common/gemm.cl" },
272 {
"gemm_mv",
"common/gemv.cl" },
273 {
"gemm_mv_quantized",
"common/gemv.cl" },
274 {
"gemm_mm_interleaved_transposed_f16",
"common/gemm_v1.cl" },
275 {
"gemm_mm_interleaved_transposed_f16_acc32",
"common/gemm_v1.cl" },
276 {
"gemm_mm_interleaved_transposed_f16_bifrost",
"common/gemm_v1.cl" },
277 {
"gemm_mm_interleaved_transposed_f32",
"common/gemm_v1.cl" },
278 {
"gemm_mm_interleaved_transposed_f32_bifrost",
"common/gemm_v1.cl" },
279 {
"gemm_mm_floating_point",
"common/gemm_v1.cl" },
280 {
"gemm_mm_floating_point_f16_bifrost",
"common/gemm_v1.cl" },
281 {
"gemm_mm_floating_point_f16_bifrost_acc32",
"common/gemm_v1.cl" },
282 {
"gemm_mm_floating_point_f32_bifrost",
"common/gemm_v1.cl" },
283 {
"gemm_mm_floating_point_f32_bifrost_1000",
"common/gemm_v1.cl" },
284 {
"gemm_mm_native",
"common/gemm.cl" },
285 {
"gemm_mm_reshaped_lhs_nt_rhs_t",
"common/gemm.cl" },
286 {
"gemm_mm_reshaped_lhs_nt_rhs_t_texture",
"common/gemm.cl" },
287 {
"gemm_mm_reshaped_lhs_t_rhs_nt",
"common/gemm.cl" },
288 {
"gemm_mm_reshaped_lhs_t_rhs_nt_texture",
"common/gemm.cl" },
289 {
"gemm_mm_reshaped_only_rhs_nt",
"common/gemm.cl" },
290 {
"gemm_mm_reshaped_only_rhs_nt_texture",
"common/gemm.cl" },
291 {
"gemm_mm_reshaped_only_rhs_t",
"common/gemm.cl" },
292 {
"gemm_mm_reshaped_only_rhs_t_texture",
"common/gemm.cl" },
293 {
"gemm_lc_vm_f32",
"common/gemm.cl" },
294 {
"gemm_reshape_lhs_matrix_nt",
"common/gemm.cl" },
295 {
"gemm_reshape_lhs_matrix_t",
"common/gemm.cl" },
296 {
"gemm_reshape_rhs_matrix_nt",
"common/gemm.cl" },
297 {
"gemm_reshape_rhs_matrix_t",
"common/gemm.cl" },
298 {
"gemmlowp_matrix_a_reduction",
"common/gemmlowp.cl" },
299 {
"gemmlowp_matrix_a_reduction_dot8",
"common/gemmlowp.cl" },
300 {
"gemmlowp_matrix_b_reduction",
"common/gemmlowp.cl" },
301 {
"gemmlowp_mm_native",
"common/gemmlowp.cl" },
302 {
"gemmlowp_mm_reshaped_lhs_nt_rhs_t",
"common/gemmlowp.cl" },
303 {
"gemmlowp_mm_reshaped_only_rhs_t",
"common/gemmlowp.cl" },
304 {
"gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint",
"common/gemmlowp.cl" },
305 {
"gemmlowp_offset_contribution",
"common/gemmlowp.cl" },
306 {
"gemmlowp_offset_contribution_quantize_down",
"common/gemmlowp.cl" },
307 {
"gemmlowp_offset_contribution_quantize_down_fixedpoint",
"common/gemmlowp.cl" },
308 {
"gemmlowp_output_stage_quantize_down",
"common/gemmlowp.cl" },
309 {
"gemmlowp_output_stage_quantize_down_fixedpoint",
"common/gemmlowp.cl" },
310 {
"gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16",
"common/gemmlowp.cl" },
311 {
"gemmlowp_output_stage_quantize_down_float",
"common/gemmlowp.cl" },
312 {
"generate_proposals_compute_all_anchors",
"common/generate_proposals.cl" },
313 {
"generate_proposals_compute_all_anchors_quantized",
"common/generate_proposals_quantized.cl" },
314 {
"instance_normalization",
"common/instance_normalization.cl" },
315 {
"compute_mean_var",
"common/instance_normalization.cl" },
316 {
"l2_normalize_x",
"common/l2_normalize.cl" },
317 {
"l2_normalize_y",
"common/l2_normalize.cl" },
318 {
"l2_normalize_z",
"common/l2_normalize.cl" },
319 {
"max_unpooling_layer_2",
"common/unpooling_layer.cl" },
320 {
"mean_stddev_normalization",
"common/mean_stddev_normalization.cl" },
321 {
"memset",
"common/memset.cl" },
322 {
"minmax_layer",
"common/minmax_layer.cl" },
323 {
"non_max_suppression",
"common/nonmax.cl" },
324 {
"pad_layer_constant",
"common/pad_layer.cl" },
325 {
"pad_layer_symmetric_reflect",
"common/pad_layer.cl" },
326 {
"permute",
"common/permute.cl" },
327 {
"pixelwise_mul_complex",
"common/pixelwise_mul_float.cl" },
328 {
"pixelwise_mul_float",
"common/pixelwise_mul_float.cl" },
329 {
"pixelwise_mul_int",
"common/pixelwise_mul_int.cl" },
330 {
"pixelwise_mul_quantized",
"common/pixelwise_mul_int.cl" },
331 {
"pooling_layer_2",
"common/pooling_layer.cl" },
332 {
"pooling_layer_3",
"common/pooling_layer.cl" },
333 {
"pooling_layer_optimized_3",
"common/pooling_layer.cl" },
334 {
"pooling_layer_7",
"common/pooling_layer.cl" },
335 {
"qlstm_layer_normalization",
"common/qlstm_layer_normalization.cl" },
336 {
"quantization_layer",
"common/quantization_layer.cl" },
337 {
"range",
"common/range.cl" },
338 {
"range_quantized",
"common/range.cl" },
339 {
"reduction_operation_x",
"common/reduction_operation.cl" },
340 {
"reduction_operation_non_parallel_x",
"common/reduction_operation.cl" },
341 {
"reduction_operation_y",
"common/reduction_operation.cl" },
342 {
"reduction_operation_z",
"common/reduction_operation.cl" },
343 {
"reduction_operation_w",
"common/reduction_operation.cl" },
344 {
"reshape_layer",
"common/reshape_layer.cl" },
345 {
"reshape_to_columns",
"common/convolution_layer.cl" },
346 {
"reverse",
"common/reverse.cl" },
347 {
"roi_align_layer",
"common/roi_align_layer.cl" },
348 {
"roi_align_layer_quantized",
"common/roi_align_layer_quantized.cl" },
349 {
"roi_pooling_layer",
"common/roi_pooling_layer.cl" },
350 {
"select_same_rank",
"common/select.cl" },
351 {
"select_different_rank_2",
"common/select.cl" },
352 {
"select_different_rank_n",
"common/select.cl" },
353 {
"softmax_layer_norm",
"common/softmax_layer.cl" },
354 {
"softmax_layer_norm_quantized",
"common/softmax_layer_quantized.cl" },
355 {
"softmax_layer_max_shift_exp_sum_quantized_serial",
"common/softmax_layer_quantized.cl" },
356 {
"softmax_layer_max_shift_exp_sum_quantized_parallel",
"common/softmax_layer_quantized.cl" },
357 {
"softmax_layer_max_shift_exp_sum_serial",
"common/softmax_layer.cl" },
358 {
"softmax_layer_max_shift_exp_sum_parallel",
"common/softmax_layer.cl" },
359 {
"stack_layer",
"common/stack_layer.cl" },
360 {
"strided_slice",
"common/slice_ops.cl" },
361 {
"tile",
"common/tile.cl" },
362 {
"transpose",
"common/transpose.cl" },
363 #ifdef ENABLE_NCHW_KERNELS 364 {
"batch_to_space_nchw",
"nchw/batch_to_space.cl" },
365 {
"batch_to_space_static_nchw",
"nchw/batch_to_space.cl" },
366 {
"batchnormalization_layer_nchw",
"nchw/batchnormalization_layer.cl" },
367 {
"channel_shuffle_nchw",
"nchw/channel_shuffle.cl" },
368 {
"depth_to_space_nchw",
"nchw/depth_to_space.cl" },
369 {
"dequantization_layer_per_channel_nchw",
"nchw/dequantization_layer.cl" },
370 {
"direct_convolution1x1",
"nchw/direct_convolution1x1.cl" },
371 {
"direct_convolution1x1_f32_bifrost",
"nchw/direct_convolution1x1.cl" },
372 {
"direct_convolution3x3",
"nchw/direct_convolution3x3.cl" },
373 {
"direct_convolution3x3_f32_bifrost",
"nchw/direct_convolution3x3.cl" },
374 {
"direct_convolution5x5",
"nchw/direct_convolution5x5.cl" },
375 {
"direct_convolution5x5_f32_bifrost",
"nchw/direct_convolution5x5.cl" },
376 {
"direct_convolution_quantized",
"nchw/direct_convolution_quantized.cl" },
377 {
"im2col1x1_stridex1_nchw",
"nchw/im2col.cl" },
378 {
"im2col3x3_nchw",
"nchw/im2col.cl" },
379 {
"im2col5x5_nchw",
"nchw/im2col.cl" },
380 {
"im2col11x11_padx0_pady0_nchw",
"nchw/im2col.cl" },
381 {
"im2col_generic_nchw",
"nchw/im2col.cl" },
382 {
"im2col_generic_padx0_pady0_nchw",
"nchw/im2col.cl" },
383 {
"normalization_layer_cross_map_nchw",
"nchw/normalization_layer.cl" },
384 {
"normalization_layer_in_map_nchw",
"nchw/normalization_layer.cl" },
385 {
"normalize_planar_yuv_layer_nchw",
"nchw/normalize_planar_yuv_layer.cl" },
386 {
"normalize_planar_yuv_layer_q8_nchw",
"nchw/normalize_planar_yuv_layer_quantized.cl" },
387 {
"pooling_layer_MxN_nchw",
"nchw/pooling_layer.cl" },
388 {
"pooling_layer_2_nchw_indices_fp32",
"nchw/pooling_layer.cl" },
389 {
"pooling_layer_2_nchw_indices_fp16",
"nchw/pooling_layer.cl" },
390 {
"pooling_layer_MxN_quantized_nchw",
"nchw/pooling_layer_quantized.cl" },
391 {
"prior_box_layer_nchw",
"nchw/prior_box_layer.cl" },
392 {
"remap_nearest_neighbour_nchw",
"nchw/remap.cl" },
393 {
"remap_bilinear_nchw",
"nchw/remap.cl" },
394 {
"reorg_layer_nchw",
"nchw/reorg_layer.cl" },
395 {
"scale_nearest_neighbour_nchw",
"nchw/scale.cl" },
396 {
"scale_bilinear_nchw",
"nchw/scale.cl" },
397 {
"scale_bilinear_quantized_nchw",
"nchw/scale_quantized.cl" },
398 {
"space_to_batch_nchw",
"nchw/space_to_batch.cl" },
399 {
"space_to_batch_static_nchw",
"nchw/space_to_batch.cl" },
400 {
"space_to_depth_nchw",
"nchw/space_to_depth.cl" },
401 {
"upsample_layer_nchw",
"nchw/upsample_layer.cl" },
402 {
"winograd_filter_transform_2x2_3x3_nchw",
"nchw/winograd_filter_transform.cl" },
403 {
"winograd_filter_transform_2x1_3x1_nchw",
"nchw/winograd_filter_transform.cl" },
404 {
"winograd_filter_transform_1x2_1x3_nchw",
"nchw/winograd_filter_transform.cl" },
405 {
"winograd_filter_transform_4x4_3x3_nchw",
"nchw/winograd_filter_transform.cl" },
406 {
"winograd_filter_transform_4x1_3x1_nchw",
"nchw/winograd_filter_transform.cl" },
407 {
"winograd_filter_transform_1x4_1x3_nchw",
"nchw/winograd_filter_transform.cl" },
408 {
"winograd_filter_transform_4x4_5x5_nchw",
"nchw/winograd_filter_transform.cl" },
409 {
"winograd_filter_transform_4x1_5x1_nchw",
"nchw/winograd_filter_transform.cl" },
410 {
"winograd_filter_transform_1x4_1x5_nchw",
"nchw/winograd_filter_transform.cl" },
411 {
"winograd_input_transform_2x2_3x3_stepz1_nchw",
"nchw/winograd_input_transform.cl" },
412 {
"winograd_input_transform_2x2_3x3_stepz2_nchw",
"nchw/winograd_input_transform.cl" },
413 {
"winograd_input_transform_2x1_3x1_stepz1_nchw",
"nchw/winograd_input_transform.cl" },
414 {
"winograd_input_transform_2x1_3x1_stepz2_nchw",
"nchw/winograd_input_transform.cl" },
415 {
"winograd_input_transform_1x2_1x3_stepz1_nchw",
"nchw/winograd_input_transform.cl" },
416 {
"winograd_input_transform_1x2_1x3_stepz2_nchw",
"nchw/winograd_input_transform.cl" },
417 {
"winograd_input_transform_4x4_3x3_stepz1_nchw",
"nchw/winograd_input_transform.cl" },
418 {
"winograd_input_transform_4x1_3x1_stepz1_nchw",
"nchw/winograd_input_transform.cl" },
419 {
"winograd_input_transform_1x4_1x3_stepz1_nchw",
"nchw/winograd_input_transform.cl" },
420 {
"winograd_input_transform_4x4_5x5_stepz1_nchw",
"nchw/winograd_input_transform.cl" },
421 {
"winograd_input_transform_4x1_5x1_stepz1_nchw",
"nchw/winograd_input_transform.cl" },
422 {
"winograd_input_transform_1x4_1x5_stepz1_nchw",
"nchw/winograd_input_transform.cl" },
423 {
"winograd_output_transform_2x2_3x3_nchw",
"nchw/winograd_output_transform.cl" },
424 {
"winograd_output_transform_2x1_3x1_nchw",
"nchw/winograd_output_transform.cl" },
425 {
"winograd_output_transform_1x2_1x3_nchw",
"nchw/winograd_output_transform.cl" },
426 {
"winograd_output_transform_4x4_3x3_nchw",
"nchw/winograd_output_transform.cl" },
427 {
"winograd_output_transform_4x1_3x1_nchw",
"nchw/winograd_output_transform.cl" },
428 {
"winograd_output_transform_1x4_1x3_nchw",
"nchw/winograd_output_transform.cl" },
429 {
"winograd_output_transform_4x4_5x5_nchw",
"nchw/winograd_output_transform.cl" },
430 {
"winograd_output_transform_4x1_5x1_nchw",
"nchw/winograd_output_transform.cl" },
431 {
"winograd_output_transform_1x4_1x5_nchw",
"nchw/winograd_output_transform.cl" },
433 #ifdef ENABLE_NHWC_KERNELS 434 {
"batch_to_space_nhwc",
"nhwc/batch_to_space.cl" },
435 {
"batch_to_space_static_nhwc",
"nhwc/batch_to_space.cl" },
436 {
"batchnormalization_layer_nhwc",
"nhwc/batchnormalization_layer.cl" },
437 {
"channel_shuffle_nhwc",
"nhwc/channel_shuffle.cl" },
438 {
"depth_to_space_nhwc",
"nhwc/depth_to_space.cl" },
439 {
"dequantization_layer_per_channel_nhwc",
"nhwc/dequantization_layer.cl" },
440 {
"dwc_native_fp_nhwc",
"nhwc/dwc_native_fp_nhwc.cl" },
441 {
"dwc_native_quantized_nhwc",
"nhwc/dwc_native_quantized_nhwc.cl" },
442 {
"direct_convolution_nhwc",
"nhwc/direct_convolution.cl" },
443 {
"im2col3x3_nhwc",
"nhwc/im2col.cl" },
444 {
"im2col9x9_nhwc",
"nhwc/im2col.cl" },
445 {
"im2col_generic_nhwc",
"nhwc/im2col.cl" },
446 {
"normalization_layer_cross_map_nhwc",
"nhwc/normalization_layer.cl" },
447 {
"normalization_layer_in_map_nhwc",
"nhwc/normalization_layer.cl" },
448 {
"normalize_planar_yuv_layer_nhwc",
"nhwc/normalize_planar_yuv_layer.cl" },
449 {
"normalize_planar_yuv_layer_q8_nhwc",
"nhwc/normalize_planar_yuv_layer_quantized.cl" },
450 {
"pooling_layer_MxN_nhwc",
"nhwc/pooling_layer.cl" },
451 {
"pooling_layer_2x2_nhwc",
"nhwc/pooling_layer.cl" },
452 {
"pooling_layer_MxN_quantized_nhwc",
"nhwc/pooling_layer_quantized.cl" },
453 {
"remap_nearest_neighbour_nhwc",
"nhwc/remap.cl" },
454 {
"remap_bilinear_nhwc",
"nhwc/remap.cl" },
455 {
"reorg_layer_nhwc",
"nhwc/reorg_layer.cl" },
456 {
"scale_nearest_neighbour_nhwc",
"nhwc/scale.cl" },
457 {
"scale_bilinear_nhwc",
"nhwc/scale.cl" },
458 {
"scale_bilinear_quantized_nhwc",
"nhwc/scale_quantized.cl" },
459 {
"space_to_batch_nhwc",
"nhwc/space_to_batch.cl" },
460 {
"space_to_batch_static_nhwc",
"nhwc/space_to_batch.cl" },
461 {
"space_to_depth_nhwc",
"nhwc/space_to_depth.cl" },
462 {
"upsample_layer_nhwc",
"nhwc/upsample_layer.cl" },
463 {
"winograd_filter_transform_4x1_3x1_nhwc",
"nhwc/winograd_filter_transform.cl" },
464 {
"winograd_filter_transform_1x4_1x3_nhwc",
"nhwc/winograd_filter_transform.cl" },
465 {
"winograd_filter_transform_4x4_3x3_nhwc",
"nhwc/winograd_filter_transform.cl" },
466 {
"winograd_filter_transform_4x4_5x5_nhwc",
"nhwc/winograd_filter_transform.cl" },
467 {
"winograd_filter_transform_4x1_5x1_nhwc",
"nhwc/winograd_filter_transform.cl" },
468 {
"winograd_filter_transform_1x4_1x5_nhwc",
"nhwc/winograd_filter_transform.cl" },
469 {
"winograd_filter_transform_2x2_7x7_nhwc",
"nhwc/winograd_filter_transform.cl" },
470 {
"winograd_filter_transform_2x1_7x1_nhwc",
"nhwc/winograd_filter_transform.cl" },
471 {
"winograd_filter_transform_1x2_1x7_nhwc",
"nhwc/winograd_filter_transform.cl" },
472 {
"winograd_input_transform_4x1_3x1_stepz1_nhwc",
"nhwc/winograd_input_transform.cl" },
473 {
"winograd_input_transform_1x4_1x3_stepz1_nhwc",
"nhwc/winograd_input_transform.cl" },
474 {
"winograd_input_transform_4x4_3x3_stepz1_nhwc",
"nhwc/winograd_input_transform.cl" },
475 {
"winograd_input_transform_4x4_5x5_stepz1_nhwc",
"nhwc/winograd_input_transform.cl" },
476 {
"winograd_input_transform_4x1_5x1_stepz1_nhwc",
"nhwc/winograd_input_transform.cl" },
477 {
"winograd_input_transform_1x4_1x5_stepz1_nhwc",
"nhwc/winograd_input_transform.cl" },
478 {
"winograd_input_transform_2x2_7x7_stepz1_nhwc",
"nhwc/winograd_input_transform.cl" },
479 {
"winograd_input_transform_2x1_7x1_stepz1_nhwc",
"nhwc/winograd_input_transform.cl" },
480 {
"winograd_input_transform_1x2_1x7_stepz1_nhwc",
"nhwc/winograd_input_transform.cl" },
481 {
"winograd_output_transform_4x1_3x1_nhwc",
"nhwc/winograd_output_transform.cl" },
482 {
"winograd_output_transform_1x4_1x3_nhwc",
"nhwc/winograd_output_transform.cl" },
483 {
"winograd_output_transform_4x4_3x3_nhwc",
"nhwc/winograd_output_transform.cl" },
484 {
"winograd_output_transform_4x4_5x5_nhwc",
"nhwc/winograd_output_transform.cl" },
485 {
"winograd_output_transform_4x1_5x1_nhwc",
"nhwc/winograd_output_transform.cl" },
486 {
"winograd_output_transform_1x4_1x5_nhwc",
"nhwc/winograd_output_transform.cl" },
487 {
"winograd_output_transform_2x2_7x7_nhwc",
"nhwc/winograd_output_transform.cl" },
488 {
"winograd_output_transform_2x1_7x1_nhwc",
"nhwc/winograd_output_transform.cl" },
489 {
"winograd_output_transform_1x2_1x7_nhwc",
"nhwc/winograd_output_transform.cl" },
493 const std::map<std::string, std::string> ClKernelLibrary::_program_source_map =
495 #ifdef EMBEDDED_KERNELS 497 "common/activation_layer.cl",
498 #include "./cl_kernels/common/activation_layer.clembed" 501 "common/activation_layer_quant.cl",
502 #include "./cl_kernels/common/activation_layer_quant.clembed" 505 "common/arg_min_max.cl",
506 #include "./cl_kernels/common/arg_min_max.clembed" 509 "common/bitwise_op.cl",
510 #include "./cl_kernels/common/bitwise_op.clembed" 513 "common/bounding_box_transform.cl",
514 #include "./cl_kernels/common/bounding_box_transform.clembed" 517 "common/bounding_box_transform_quantized.cl",
518 #include "./cl_kernels/common/bounding_box_transform_quantized.clembed" 522 #include "./cl_kernels/common/col2im.clembed" 525 "common/comparisons.cl",
526 #include "./cl_kernels/common/comparisons.clembed" 529 "common/concatenate.cl",
530 #include "./cl_kernels/common/concatenate.clembed" 533 "common/convert_fc_weights.cl",
534 #include "./cl_kernels/common/convert_fc_weights.clembed" 537 "common/convolution_layer.cl",
538 #include "./cl_kernels/common/convolution_layer.clembed" 541 "common/copy_tensor.cl",
542 #include "./cl_kernels/common/copy_tensor.clembed" 545 "common/crop_tensor.cl",
546 #include "./cl_kernels/common/crop_tensor.clembed" 549 "common/deconvolution_layer.cl",
550 #include "./cl_kernels/common/deconvolution_layer.clembed" 554 #include "./cl_kernels/common/cast.clembed" 557 "common/dequantization_layer.cl",
558 #include "./cl_kernels/common/dequantization_layer.clembed" 561 "common/elementwise_operation.cl",
562 #include "./cl_kernels/common/elementwise_operation.clembed" 565 "common/elementwise_operation_quantized.cl",
566 #include "./cl_kernels/common/elementwise_operation_quantized.clembed" 569 "common/elementwise_unary.cl",
570 #include "./cl_kernels/common/elementwise_unary.clembed" 574 #include "./cl_kernels/common/fft.clembed" 577 "common/fft_digit_reverse.cl",
578 #include "./cl_kernels/common/fft_digit_reverse.clembed" 581 "common/fft_scale.cl",
582 #include "./cl_kernels/common/fft_scale.clembed" 585 "common/fill_border.cl",
586 #include "./cl_kernels/common/fill_border.clembed" 590 #include "./cl_kernels/common/floor.clembed" 594 #include "./cl_kernels/common/gather.clembed" 598 #include "./cl_kernels/common/gemm.clembed" 602 #include "./cl_kernels/common/gemm_v1.clembed" 605 "common/gemmlowp.cl",
606 #include "./cl_kernels/common/gemmlowp.clembed" 610 #include "./cl_kernels/common/gemv.clembed" 613 "common/generate_proposals.cl",
614 #include "./cl_kernels/common/generate_proposals.clembed" 617 "common/generate_proposals_quantized.cl",
618 #include "./cl_kernels/common/generate_proposals_quantized.clembed" 622 #include "./cl_kernels/helpers.hembed" 626 #include "./cl_kernels/helpers_asymm.hembed" 629 "common/instance_normalization.cl",
630 #include "./cl_kernels/common/instance_normalization.clembed" 633 "common/l2_normalize.cl",
634 #include "./cl_kernels/common/l2_normalize.clembed" 637 "common/mean_stddev_normalization.cl",
638 #include "./cl_kernels/common/mean_stddev_normalization.clembed" 642 #include "./cl_kernels/common/memset.clembed" 645 "common/minmax_layer.cl",
646 #include "./cl_kernels/common/minmax_layer.clembed" 650 #include "./cl_kernels/common/nonmax.clembed" 653 "common/batchnormalization_layer.cl",
654 #include "./cl_kernels/common/batchnormalization_layer.clembed" 657 "common/pad_layer.cl",
658 #include "./cl_kernels/common/pad_layer.clembed" 662 #include "./cl_kernels/common/permute.clembed" 665 "common/pixelwise_mul_float.cl",
666 #include "./cl_kernels/common/pixelwise_mul_float.clembed" 669 "common/pixelwise_mul_int.cl",
670 #include "./cl_kernels/common/pixelwise_mul_int.clembed" 673 "common/pooling_layer.cl",
674 #include "./cl_kernels/common/pooling_layer.clembed" 677 "common/qlstm_layer_normalization.cl",
678 #include "./cl_kernels/common/qlstm_layer_normalization.clembed" 681 "common/quantization_layer.cl",
682 #include "./cl_kernels/common/quantization_layer.clembed" 686 #include "./cl_kernels/common/range.clembed" 689 "common/reduction_operation.cl",
690 #include "./cl_kernels/common/reduction_operation.clembed" 693 "common/reshape_layer.cl",
694 #include "./cl_kernels/common/reshape_layer.clembed" 698 #include "./cl_kernels/common/reverse.clembed" 701 "common/roi_align_layer.cl",
702 #include "./cl_kernels/common/roi_align_layer.clembed" 705 "common/roi_align_layer_quantized.cl",
706 #include "./cl_kernels/common/roi_align_layer_quantized.clembed" 709 "common/roi_pooling_layer.cl",
710 #include "./cl_kernels/common/roi_pooling_layer.clembed" 714 #include "./cl_kernels/common/select.clembed" 717 "common/softmax_layer.cl",
718 #include "./cl_kernels/common/softmax_layer.clembed" 721 "common/softmax_layer_quantized.cl",
722 #include "./cl_kernels/common/softmax_layer_quantized.clembed" 725 "common/slice_ops.cl",
726 #include "./cl_kernels/common/slice_ops.clembed" 729 "common/stack_layer.cl",
730 #include "./cl_kernels/common/stack_layer.clembed" 734 #include "./cl_kernels/common/tile.clembed" 737 "common/transpose.cl",
738 #include "./cl_kernels/common/transpose.clembed" 742 #include "./cl_kernels/types.hembed" 745 "common/unpooling_layer.cl",
746 #include "./cl_kernels/common/unpooling_layer.clembed" 748 #ifdef ENABLE_NCHW_KERNELS 750 "nchw/batch_to_space.cl",
751 #include "./cl_kernels/nchw/batch_to_space.clembed" 754 "nchw/channel_shuffle.cl",
755 #include "./cl_kernels/nchw/channel_shuffle.clembed" 758 "nchw/upsample_layer.cl",
759 #include "./cl_kernels/nchw/upsample_layer.clembed" 762 "nchw/depth_to_space.cl",
763 #include "./cl_kernels/nchw/depth_to_space.clembed" 766 "nchw/dequantization_layer.cl",
767 #include "./cl_kernels/nchw/dequantization_layer.clembed" 770 "nchw/direct_convolution1x1.cl",
771 #include "./cl_kernels/nchw/direct_convolution1x1.clembed" 774 "nchw/direct_convolution3x3.cl",
775 #include "./cl_kernels/nchw/direct_convolution3x3.clembed" 778 "nchw/direct_convolution5x5.cl",
779 #include "./cl_kernels/nchw/direct_convolution5x5.clembed" 782 "nchw/direct_convolution_quantized.cl",
783 #include "./cl_kernels/nchw/direct_convolution_quantized.clembed" 787 #include "./cl_kernels/nchw/im2col.clembed" 790 "nchw/normalization_layer.cl",
791 #include "./cl_kernels/nchw/normalization_layer.clembed" 794 "nchw/normalize_planar_yuv_layer.cl",
795 #include "./cl_kernels/nchw/normalize_planar_yuv_layer.clembed" 798 "nchw/normalize_planar_yuv_layer_quantized.cl",
799 #include "./cl_kernels/nchw/normalize_planar_yuv_layer_quantized.clembed" 802 "nchw/batchnormalization_layer.cl",
803 #include "./cl_kernels/nchw/batchnormalization_layer.clembed" 806 "nchw/pooling_layer.cl",
807 #include "./cl_kernels/nchw/pooling_layer.clembed" 810 "nchw/pooling_layer_quantized.cl",
811 #include "./cl_kernels/nchw/pooling_layer_quantized.clembed" 814 "nchw/prior_box_layer.cl",
815 #include "./cl_kernels/nchw/prior_box_layer.clembed" 819 #include "./cl_kernels/nchw/remap.clembed" 822 "nchw/reorg_layer.cl",
823 #include "./cl_kernels/nchw/reorg_layer.clembed" 827 #include "./cl_kernels/nchw/scale.clembed" 830 "nchw/scale_quantized.cl",
831 #include "./cl_kernels/nchw/scale_quantized.clembed" 834 "nchw/space_to_batch.cl",
835 #include "./cl_kernels/nchw/space_to_batch.clembed" 838 "nchw/space_to_depth.cl",
839 #include "./cl_kernels/nchw/space_to_depth.clembed" 842 "nchw/winograd_filter_transform.cl",
843 #include "./cl_kernels/nchw/winograd_filter_transform.clembed" 846 "nchw/winograd_input_transform.cl",
847 #include "./cl_kernels/nchw/winograd_input_transform.clembed" 850 "nchw/winograd_output_transform.cl",
851 #include "./cl_kernels/nchw/winograd_output_transform.clembed" 855 #ifdef ENABLE_NHWC_KERNELS 857 "nhwc/batch_to_space.cl",
858 #include "./cl_kernels/nhwc/batch_to_space.clembed" 861 "nhwc/channel_shuffle.cl",
862 #include "./cl_kernels/nhwc/channel_shuffle.clembed" 865 "nhwc/upsample_layer.cl",
866 #include "./cl_kernels/nhwc/upsample_layer.clembed" 869 "nhwc/depth_to_space.cl",
870 #include "./cl_kernels/nhwc/depth_to_space.clembed" 873 "nhwc/dequantization_layer.cl",
874 #include "./cl_kernels/nhwc/dequantization_layer.clembed" 877 "nhwc/direct_convolution.cl",
878 #include "./cl_kernels/nhwc/direct_convolution.clembed" 881 "nhwc/dwc_native_fp_nhwc.cl",
882 #include "./cl_kernels/nhwc/dwc_native_fp_nhwc.clembed" 885 "nhwc/dwc_native_quantized_nhwc.cl",
886 #include "./cl_kernels/nhwc/dwc_native_quantized_nhwc.clembed" 889 "nhwc/normalization_layer.cl",
890 #include "./cl_kernels/nhwc/normalization_layer.clembed" 893 "nhwc/normalize_planar_yuv_layer.cl",
894 #include "./cl_kernels/nhwc/normalize_planar_yuv_layer.clembed" 897 "nhwc/normalize_planar_yuv_layer_quantized.cl",
898 #include "./cl_kernels/nhwc/normalize_planar_yuv_layer_quantized.clembed" 902 #include "./cl_kernels/nhwc/im2col.clembed" 905 "nhwc/batchnormalization_layer.cl",
906 #include "./cl_kernels/nhwc/batchnormalization_layer.clembed" 909 "nhwc/pooling_layer.cl",
910 #include "./cl_kernels/nhwc/pooling_layer.clembed" 913 "nhwc/pooling_layer_quantized.cl",
914 #include "./cl_kernels/nhwc/pooling_layer_quantized.clembed" 918 #include "./cl_kernels/nhwc/remap.clembed" 921 "nhwc/reorg_layer.cl",
922 #include "./cl_kernels/nhwc/reorg_layer.clembed" 926 #include "./cl_kernels/nhwc/scale.clembed" 929 "nhwc/scale_quantized.cl",
930 #include "./cl_kernels/nhwc/scale_quantized.clembed" 933 "nhwc/space_to_batch.cl",
934 #include "./cl_kernels/nhwc/space_to_batch.clembed" 937 "nhwc/space_to_depth.cl",
938 #include "./cl_kernels/nhwc/space_to_depth.clembed" 941 "nhwc/winograd_filter_transform.cl",
942 #include "./cl_kernels/nhwc/winograd_filter_transform.clembed" 945 "nhwc/winograd_input_transform.cl",
946 #include "./cl_kernels/nhwc/winograd_input_transform.clembed" 949 "nhwc/winograd_output_transform.cl",
950 #include "./cl_kernels/nhwc/winograd_output_transform.clembed" 959 return _kernel_library;
965 auto kernel_program_it = _kernel_program_map.find(kernel_name);
967 if(_kernel_program_map.end() == kernel_program_it)
972 const std::string
program_name = kernel_program_it->second;
979 _kernel_path = std::move(kernel_path);
990 #ifdef EMBEDDED_KERNELS 991 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS 992 const auto inflatted_program_source_it = _decompressed_source_map.find(program_name);
993 if(inflatted_program_source_it != _decompressed_source_map.end())
995 return ClProgramInfo{ inflatted_program_source_it->second,
false };
999 const auto program_source_it = _program_source_map.find(program_name);
1000 if(program_source_it == _program_source_map.end())
1004 std::string program_source = program_source_it->second;
1006 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS 1007 std::string decompressed_program_source = decompress_zlib(decode_base64(program_source_it->second));
1009 _decompressed_source_map.insert(std::make_pair(program_name, decompressed_program_source));
1010 program_source = std::move(decompressed_program_source);
1017 std::string binary_name = source_name +
"bin";
1018 std::string program_source{};
1019 bool is_binary =
false;
1021 if(std::ifstream(binary_name).is_open())
1023 program_source =
read_file(binary_name,
true);
1026 else if(std::ifstream(source_name).is_open())
1028 program_source =
read_file(source_name,
false);
ClKernelLibrary contains all the OpenCL kernels that are used throughout the library.
#define ARM_COMPUTE_ERROR_VAR(msg,...)
Print the given message then throw an std::runtime_error.
ClProgramInfo program(const std::string &program_name) const
Gets the source of the selected program.
std::string program_name(const std::string &kernel_name) const
Returns the program name given a kernel name.
Copyright (c) 2017-2021 Arm Limited.
const std::string & kernel_path() const
Gets the path that the kernels reside in.
static ClKernelLibrary & get()
Access the KernelLibrary singleton.
std::string read_file(const std::string &filename, bool binary)
Load an entire file in memory.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Structure to encapsulte program related information.
void end(TokenStream &in, bool &valid)
void set_kernel_path(std::string kernel_path)
Sets the path that the kernels reside in.