34 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS
40 constexpr std::array<uint8_t, 256> b64_invtab = {
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
43 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
44 22, 23, 24, 25, 0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58 std::string decode_base64(
const std::string &
str)
60 constexpr
const char pad_char =
'=';
76 std::size_t padding = (
str.rbegin()[0] == pad_char) + (
str.rbegin()[1] == pad_char);
77 const int str_len =
str.size();
82 dec_b64.reserve(((str_len / 4) * 3));
86 const int end = str_len - 4 - padding;
87 for (; c <=
end; c += 4)
89 const int byte0 = b64_invtab[
str[c]];
90 const int byte1 = b64_invtab[
str[c + 1]];
91 const int byte2 = b64_invtab[
str[c + 2]];
92 const int byte3 = b64_invtab[
str[c + 3]];
94 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
95 dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
96 dec_b64.push_back((byte2 << 6) | (byte3));
102 const int byte0 = b64_invtab[
str[c]];
103 const int byte1 = b64_invtab[
str[c + 1]];
104 const int byte2 = b64_invtab[
str[c + 2]];
106 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
107 dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
109 else if (padding == 2)
111 const int byte0 = b64_invtab[
str[c]];
112 const int byte1 = b64_invtab[
str[c + 1]];
114 dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
126 std::string decompress_zlib(
const std::string &
str)
130 if (inflateInit(&ds) != Z_OK)
132 return std::string();
134 ds.avail_in =
str.size();
135 ds.next_in = (Bytef *)
str.data();
139 char roll_buff[16384];
140 std::string inflated_str;
143 ds.avail_out =
sizeof(roll_buff);
144 ds.next_out =
reinterpret_cast<Bytef *
>(roll_buff);
146 status = inflate(&ds, 0);
147 if (inflated_str.size() < ds.total_out)
149 inflated_str.append(roll_buff, ds.total_out - inflated_str.size());
151 }
while (status == Z_OK);
155 if (status != Z_STREAM_END)
157 return std::string();
169 const std::map<std::string, std::string> ClKernelLibrary::_kernel_program_map = {
171 {
"activation_layer",
"common/activation_layer.cl"},
172 {
"activation_layer_quant",
"common/activation_layer_quant.cl"},
173 {
"activation_layer_quant_f32",
"common/activation_layer_quant.cl"},
174 {
"arg_min_max_x",
"common/arg_min_max.cl"},
175 {
"arg_min_max_y",
"common/arg_min_max.cl"},
176 {
"arg_min_max_z",
"common/arg_min_max.cl"},
177 {
"arg_min_max_w",
"common/arg_min_max.cl"},
178 {
"bitwise_or",
"common/bitwise_op.cl"},
179 {
"bitwise_and",
"common/bitwise_op.cl"},
180 {
"bitwise_xor",
"common/bitwise_op.cl"},
181 {
"bitwise_not",
"common/bitwise_op.cl"},
182 {
"bounding_box_transform",
"common/bounding_box_transform.cl"},
183 {
"bounding_box_transform_quantized",
"common/bounding_box_transform_quantized.cl"},
184 {
"compare_equal",
"common/comparisons.cl"},
185 {
"compare_equal_quantized",
"common/comparisons.cl"},
186 {
"compare_notequal",
"common/comparisons.cl"},
187 {
"compare_notequal_quantized",
"common/comparisons.cl"},
188 {
"compare_greater",
"common/comparisons.cl"},
189 {
"compare_greater_quantized",
"common/comparisons.cl"},
190 {
"compare_greaterequal",
"common/comparisons.cl"},
191 {
"compare_greaterequal_quantized",
"common/comparisons.cl"},
192 {
"compare_less",
"common/comparisons.cl"},
193 {
"compare_less_quantized",
"common/comparisons.cl"},
194 {
"compare_lessequal",
"common/comparisons.cl"},
195 {
"compare_lessequal_quantized",
"common/comparisons.cl"},
196 {
"concatenate",
"common/concatenate.cl"},
197 {
"concatenate_width",
"common/concatenate.cl"},
198 {
"concatenate_height",
"common/concatenate.cl"},
199 {
"concatenate_width_x2",
"common/concatenate.cl"},
200 {
"concatenate_width_x4",
"common/concatenate.cl"},
201 {
"col2im",
"common/col2im.cl"},
202 {
"cast_down",
"common/cast.cl"},
203 {
"cast_up",
"common/cast.cl"},
204 {
"convert_fc_weights",
"common/convert_fc_weights.cl"},
205 {
"copy_tensor",
"common/copy_tensor.cl"},
206 {
"crop_tensor",
"common/crop_tensor.cl"},
207 {
"deconvolution_reshape",
"common/deconvolution_layer.cl"},
208 {
"deconvolution_upsample",
"common/deconvolution_layer.cl"},
209 {
"dequantization_layer",
"common/dequantization_layer.cl"},
210 {
"elementwise_operation_ADD",
"common/elementwise_operation.cl"},
211 {
"elementwise_operation_SUB",
"common/elementwise_operation.cl"},
212 {
"elementwise_operation_MAX",
"common/elementwise_operation.cl"},
213 {
"elementwise_operation_MIN",
"common/elementwise_operation.cl"},
214 {
"elementwise_operation_DIV",
"common/elementwise_operation.cl"},
215 {
"elementwise_operation_SQUARED_DIFF",
"common/elementwise_operation.cl"},
216 {
"elementwise_operation_POWER",
"common/elementwise_operation.cl"},
217 {
"elementwise_operation_PRELU",
"common/elementwise_operation.cl"},
218 {
"elementwise_operation_AND",
"common/elementwise_operation.cl"},
219 {
"elementwise_operation_OR",
"common/elementwise_operation.cl"},
220 {
"elementwise_operation_ADD_quantized",
"common/elementwise_operation_quantized.cl"},
221 {
"elementwise_operation_SUB_quantized",
"common/elementwise_operation_quantized.cl"},
222 {
"elementwise_operation_MAX_quantized",
"common/elementwise_operation_quantized.cl"},
223 {
"elementwise_operation_MIN_quantized",
"common/elementwise_operation_quantized.cl"},
224 {
"elementwise_operation_DIV_quantized",
"common/elementwise_operation_quantized.cl"},
225 {
"elementwise_operation_SQUARED_DIFF_quantized",
"common/elementwise_operation_quantized.cl"},
226 {
"elementwise_operation_PRELU_quantized",
"common/elementwise_operation_quantized.cl"},
227 {
"elementwise_unary",
"common/elementwise_unary.cl"},
228 {
"elementwise_unary_quantized",
"common/elementwise_unary_quantized.cl"},
229 {
"fft_digit_reverse_axis_0",
"common/fft_digit_reverse.cl"},
230 {
"fft_digit_reverse_axis_1",
"common/fft_digit_reverse.cl"},
231 {
"fft_radix_2_first_stage_axis_0",
"common/fft.cl"},
232 {
"fft_radix_2_first_stage_axis_1",
"common/fft.cl"},
233 {
"fft_radix_2_axis_0",
"common/fft.cl"},
234 {
"fft_radix_2_axis_1",
"common/fft.cl"},
235 {
"fft_radix_3_first_stage_axis_0",
"common/fft.cl"},
236 {
"fft_radix_3_first_stage_axis_1",
"common/fft.cl"},
237 {
"fft_radix_3_axis_0",
"common/fft.cl"},
238 {
"fft_radix_3_axis_1",
"common/fft.cl"},
239 {
"fft_radix_4_first_stage_axis_0",
"common/fft.cl"},
240 {
"fft_radix_4_first_stage_axis_1",
"common/fft.cl"},
241 {
"fft_radix_4_axis_0",
"common/fft.cl"},
242 {
"fft_radix_4_axis_1",
"common/fft.cl"},
243 {
"fft_radix_5_first_stage_axis_0",
"common/fft.cl"},
244 {
"fft_radix_5_first_stage_axis_1",
"common/fft.cl"},
245 {
"fft_radix_5_axis_0",
"common/fft.cl"},
246 {
"fft_radix_5_axis_1",
"common/fft.cl"},
247 {
"fft_radix_7_first_stage_axis_0",
"common/fft.cl"},
248 {
"fft_radix_7_first_stage_axis_1",
"common/fft.cl"},
249 {
"fft_radix_7_axis_0",
"common/fft.cl"},
250 {
"fft_radix_7_axis_1",
"common/fft.cl"},
251 {
"fft_radix_8_first_stage_axis_0",
"common/fft.cl"},
252 {
"fft_radix_8_first_stage_axis_1",
"common/fft.cl"},
253 {
"fft_radix_8_axis_0",
"common/fft.cl"},
254 {
"fft_radix_8_axis_1",
"common/fft.cl"},
255 {
"fft_scale_conj",
"common/fft_scale.cl"},
256 {
"fill_image_borders_constant",
"common/fill_border.cl"},
257 {
"fill_image_borders_replicate",
"common/fill_border.cl"},
258 {
"floor_layer",
"common/floor.cl"},
259 {
"fuse_batchnormalization_layer",
"common/batchnormalization_layer.cl"},
260 {
"gather",
"common/gather.cl"},
261 {
"gemm_ma_f16",
"common/gemm.cl"},
262 {
"gemm_ma_f32",
"common/gemm.cl"},
263 {
"gemm_mv",
"common/gemv.cl"},
264 {
"gemm_mv_quantized",
"common/gemv.cl"},
265 {
"gemm_mm_native",
"common/gemm.cl"},
266 {
"gemm_mm_reshaped_only_rhs_nt_mmul",
"common/gemm_reshaped_only_rhs_mmul.cl"},
267 {
"gemm_mm_reshaped_only_rhs_nt_mmul_texture",
"common/gemm_reshaped_only_rhs_mmul.cl"},
268 {
"gemm_mm_reshaped_lhs_nt_rhs_t",
"common/gemm.cl"},
269 {
"gemm_mm_reshaped_lhs_nt_rhs_t_texture",
"common/gemm.cl"},
270 {
"gemm_mm_reshaped_lhs_t_rhs_nt",
"common/gemm.cl"},
271 {
"gemm_mm_reshaped_lhs_t_rhs_nt_texture",
"common/gemm.cl"},
272 {
"gemm_mm_reshaped_only_rhs_nt",
"common/gemm.cl"},
273 {
"gemm_mm_reshaped_only_rhs_nt_texture",
"common/gemm.cl"},
274 {
"gemm_mm_reshaped_only_rhs_t",
"common/gemm.cl"},
275 {
"gemm_mm_reshaped_only_rhs_t_texture",
"common/gemm.cl"},
276 {
"gemm_lc_vm_f32",
"common/gemm.cl"},
277 {
"gemm_reshape_lhs_matrix_nt",
"common/gemm_utils.cl"},
278 {
"gemm_reshape_lhs_matrix_t",
"common/gemm_utils.cl"},
279 {
"gemm_reshape_rhs_matrix_nt",
"common/gemm_utils.cl"},
280 {
"gemm_reshape_rhs_matrix_t",
"common/gemm_utils.cl"},
281 {
"gemmlowp_matrix_a_reduction",
"common/gemmlowp.cl"},
282 {
"gemmlowp_matrix_a_reduction_dot8",
"common/gemmlowp.cl"},
283 {
"gemmlowp_matrix_b_reduction",
"common/gemmlowp.cl"},
284 {
"gemmlowp_mm_native",
"common/gemmlowp.cl"},
285 {
"gemmlowp_mm_reshaped_lhs_nt_rhs_t",
"common/gemmlowp.cl"},
286 {
"gemmlowp_mm_reshaped_only_rhs_t",
"common/gemmlowp.cl"},
287 {
"gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint",
"common/gemmlowp.cl"},
288 {
"gemmlowp_mm_reshaped_only_rhs_mmul",
"common/gemmlowp_reshaped_only_rhs_mmul.cl"},
289 {
"gemmlowp_offset_contribution",
"common/gemmlowp.cl"},
290 {
"gemmlowp_offset_contribution_quantize_down",
"common/gemmlowp.cl"},
291 {
"gemmlowp_offset_contribution_quantize_down_fixedpoint",
"common/gemmlowp.cl"},
292 {
"gemmlowp_output_stage_quantize_down",
"common/gemmlowp.cl"},
293 {
"gemmlowp_output_stage_quantize_down_fixedpoint",
"common/gemmlowp.cl"},
294 {
"gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16",
"common/gemmlowp.cl"},
295 {
"gemmlowp_output_stage_quantize_down_float",
"common/gemmlowp.cl"},
296 {
"generate_proposals_compute_all_anchors",
"common/generate_proposals.cl"},
297 {
"generate_proposals_compute_all_anchors_quantized",
"common/generate_proposals_quantized.cl"},
298 {
"instance_normalization",
"common/instance_normalization.cl"},
299 {
"compute_mean_var",
"common/instance_normalization.cl"},
300 {
"l2_normalize_x",
"common/l2_normalize.cl"},
301 {
"l2_normalize_y",
"common/l2_normalize.cl"},
302 {
"l2_normalize_z",
"common/l2_normalize.cl"},
303 {
"mat_mul_native_mmul_nt_nt",
"common/mat_mul_mmul.cl"},
304 {
"mat_mul_native_mmul_t_nt",
"common/mat_mul_mmul.cl"},
305 {
"mat_mul_native_mmul_nt_t",
"common/mat_mul_mmul.cl"},
306 {
"mat_mul_native_mmul_t_t",
"common/mat_mul_mmul.cl"},
307 {
"mat_mul_native_nt_nt",
"common/mat_mul.cl"},
308 {
"mat_mul_native_nt_t",
"common/mat_mul.cl"},
309 {
"mat_mul_native_t_nt",
"common/mat_mul.cl"},
310 {
"mat_mul_native_t_t",
"common/mat_mul.cl"},
311 {
"mat_mul_native_quantized_nt_nt",
"common/mat_mul_quantized.cl"},
312 {
"mat_mul_native_quantized_nt_t",
"common/mat_mul_quantized.cl"},
313 {
"mat_mul_native_quantized_t_nt",
"common/mat_mul_quantized.cl"},
314 {
"mat_mul_native_quantized_t_t",
"common/mat_mul_quantized.cl"},
315 {
"mat_mul_native_quantized_mmul_nt_nt",
"common/mat_mul_quantized_mmul.cl"},
316 {
"mat_mul_native_quantized_mmul_nt_t",
"common/mat_mul_quantized_mmul.cl"},
317 {
"mat_mul_native_quantized_mmul_t_nt",
"common/mat_mul_quantized_mmul.cl"},
318 {
"mat_mul_native_quantized_mmul_t_t",
"common/mat_mul_quantized_mmul.cl"},
319 {
"max_unpooling_layer_2",
"common/unpooling_layer.cl"},
320 {
"mean_stddev_normalization",
"common/mean_stddev_normalization.cl"},
321 {
"memset",
"common/memset.cl"},
322 {
"minmax_layer",
"common/minmax_layer.cl"},
323 {
"non_max_suppression",
"common/nonmax.cl"},
324 {
"pad_layer_constant",
"common/pad_layer.cl"},
325 {
"pad_layer_symmetric_reflect",
"common/pad_layer.cl"},
326 {
"permute",
"common/permute.cl"},
327 {
"pixelwise_mul_complex",
"common/pixelwise_mul_float.cl"},
328 {
"pixelwise_mul_float",
"common/pixelwise_mul_float.cl"},
329 {
"pixelwise_mul_int",
"common/pixelwise_mul_int.cl"},
330 {
"pixelwise_mul_quantized",
"common/pixelwise_mul_int.cl"},
331 {
"qlstm_layer_normalization",
"common/qlstm_layer_normalization.cl"},
332 {
"quantization_layer",
"common/quantization_layer.cl"},
333 {
"range",
"common/range.cl"},
334 {
"range_quantized",
"common/range.cl"},
335 {
"reduction_operation_x",
"common/reduction_operation.cl"},
336 {
"reduction_operation_non_parallel_x",
"common/reduction_operation.cl"},
337 {
"reduction_operation_y",
"common/reduction_operation.cl"},
338 {
"reduction_operation_z",
"common/reduction_operation.cl"},
339 {
"reduction_operation_w",
"common/reduction_operation.cl"},
340 {
"reshape_layer",
"common/reshape_layer.cl"},
341 {
"reshape_to_columns",
"common/convolution_layer.cl"},
342 {
"reverse",
"common/reverse.cl"},
343 {
"roi_align_layer",
"common/roi_align_layer.cl"},
344 {
"roi_align_layer_quantized",
"common/roi_align_layer_quantized.cl"},
345 {
"roi_pooling_layer",
"common/roi_pooling_layer.cl"},
346 {
"select_same_rank",
"common/select.cl"},
347 {
"select_different_rank_2",
"common/select.cl"},
348 {
"select_different_rank_n",
"common/select.cl"},
349 {
"softmax_x",
"common/softmax_layer.cl"},
350 {
"softmax_non_x",
"common/softmax_layer.cl"},
351 {
"stack_layer",
"common/stack_layer.cl"},
352 {
"strided_slice",
"common/slice_ops.cl"},
353 {
"tile",
"common/tile.cl"},
354 {
"transpose",
"common/transpose.cl"},
355 #ifdef ENABLE_NCHW_KERNELS
356 {
"batch_to_space_nchw",
"nchw/batch_to_space.cl"},
357 {
"batch_to_space_static_nchw",
"nchw/batch_to_space.cl"},
358 {
"batchnormalization_layer_nchw",
"nchw/batchnormalization_layer.cl"},
359 {
"channel_shuffle_nchw",
"nchw/channel_shuffle.cl"},
360 {
"depth_to_space_nchw",
"nchw/depth_to_space.cl"},
361 {
"dequantization_layer_per_channel_nchw",
"nchw/dequantization_layer.cl"},
362 {
"direct_convolution1x1",
"nchw/direct_convolution1x1.cl"},
363 {
"direct_convolution_nchw",
"nchw/direct_convolution.cl"},
365 {
"im2col1x1_stridex1_nchw",
"nchw/im2col.cl"},
366 {
"im2col3x3_nchw",
"nchw/im2col.cl"},
367 {
"im2col5x5_nchw",
"nchw/im2col.cl"},
368 {
"im2col11x11_padx0_pady0_nchw",
"nchw/im2col.cl"},
369 {
"im2col_generic_nchw",
"nchw/im2col.cl"},
370 {
"im2col_generic_padx0_pady0_nchw",
"nchw/im2col.cl"},
371 {
"normalization_layer_cross_map_nchw",
"nchw/normalization_layer.cl"},
372 {
"normalization_layer_in_map_nchw",
"nchw/normalization_layer.cl"},
373 {
"normalize_planar_yuv_layer_nchw",
"nchw/normalize_planar_yuv_layer.cl"},
374 {
"normalize_planar_yuv_layer_q8_nchw",
"nchw/normalize_planar_yuv_layer_quantized.cl"},
375 {
"pooling_layer_MxN_nchw",
"nchw/pooling_layer.cl"},
376 {
"pooling_layer_2_nchw_indices",
"nchw/pooling_layer.cl"},
377 {
"prior_box_layer_nchw",
"nchw/prior_box_layer.cl"},
378 {
"reorg_layer_nchw",
"nchw/reorg_layer.cl"},
379 {
"scale_nearest_neighbour_nchw",
"nchw/scale.cl"},
380 {
"scale_bilinear_nchw",
"nchw/scale.cl"},
381 {
"space_to_batch_nchw",
"nchw/space_to_batch.cl"},
382 {
"space_to_batch_static_nchw",
"nchw/space_to_batch.cl"},
383 {
"space_to_depth_nchw",
"nchw/space_to_depth.cl"},
384 {
"upsample_layer_nchw",
"nchw/upsample_layer.cl"},
385 {
"winograd_filter_transform_2x2_3x3_nchw",
"nchw/winograd_filter_transform.cl"},
386 {
"winograd_filter_transform_2x1_3x1_nchw",
"nchw/winograd_filter_transform.cl"},
387 {
"winograd_filter_transform_1x2_1x3_nchw",
"nchw/winograd_filter_transform.cl"},
388 {
"winograd_filter_transform_4x4_3x3_nchw",
"nchw/winograd_filter_transform.cl"},
389 {
"winograd_filter_transform_4x1_3x1_nchw",
"nchw/winograd_filter_transform.cl"},
390 {
"winograd_filter_transform_1x4_1x3_nchw",
"nchw/winograd_filter_transform.cl"},
391 {
"winograd_filter_transform_4x4_5x5_nchw",
"nchw/winograd_filter_transform.cl"},
392 {
"winograd_filter_transform_4x1_5x1_nchw",
"nchw/winograd_filter_transform.cl"},
393 {
"winograd_filter_transform_1x4_1x5_nchw",
"nchw/winograd_filter_transform.cl"},
394 {
"winograd_input_transform_2x2_3x3_stepz1_nchw",
"nchw/winograd_input_transform.cl"},
395 {
"winograd_input_transform_2x2_3x3_stepz2_nchw",
"nchw/winograd_input_transform.cl"},
396 {
"winograd_input_transform_2x1_3x1_stepz1_nchw",
"nchw/winograd_input_transform.cl"},
397 {
"winograd_input_transform_2x1_3x1_stepz2_nchw",
"nchw/winograd_input_transform.cl"},
398 {
"winograd_input_transform_1x2_1x3_stepz1_nchw",
"nchw/winograd_input_transform.cl"},
399 {
"winograd_input_transform_1x2_1x3_stepz2_nchw",
"nchw/winograd_input_transform.cl"},
400 {
"winograd_input_transform_4x4_3x3_stepz1_nchw",
"nchw/winograd_input_transform.cl"},
401 {
"winograd_input_transform_4x1_3x1_stepz1_nchw",
"nchw/winograd_input_transform.cl"},
402 {
"winograd_input_transform_1x4_1x3_stepz1_nchw",
"nchw/winograd_input_transform.cl"},
403 {
"winograd_input_transform_4x4_5x5_stepz1_nchw",
"nchw/winograd_input_transform.cl"},
404 {
"winograd_input_transform_4x1_5x1_stepz1_nchw",
"nchw/winograd_input_transform.cl"},
405 {
"winograd_input_transform_1x4_1x5_stepz1_nchw",
"nchw/winograd_input_transform.cl"},
406 {
"winograd_output_transform_2x2_3x3_nchw",
"nchw/winograd_output_transform.cl"},
407 {
"winograd_output_transform_2x1_3x1_nchw",
"nchw/winograd_output_transform.cl"},
408 {
"winograd_output_transform_1x2_1x3_nchw",
"nchw/winograd_output_transform.cl"},
409 {
"winograd_output_transform_4x4_3x3_nchw",
"nchw/winograd_output_transform.cl"},
410 {
"winograd_output_transform_4x1_3x1_nchw",
"nchw/winograd_output_transform.cl"},
411 {
"winograd_output_transform_1x4_1x3_nchw",
"nchw/winograd_output_transform.cl"},
412 {
"winograd_output_transform_4x4_5x5_nchw",
"nchw/winograd_output_transform.cl"},
413 {
"winograd_output_transform_4x1_5x1_nchw",
"nchw/winograd_output_transform.cl"},
414 {
"winograd_output_transform_1x4_1x5_nchw",
"nchw/winograd_output_transform.cl"},
416 #ifdef ENABLE_NHWC_KERNELS
417 {
"batch_to_space_nhwc",
"nhwc/batch_to_space.cl"},
418 {
"batch_to_space_static_nhwc",
"nhwc/batch_to_space.cl"},
419 {
"batchnormalization_layer_nhwc",
"nhwc/batchnormalization_layer.cl"},
420 {
"channel_shuffle_nhwc",
"nhwc/channel_shuffle.cl"},
421 {
"depth_to_space_nhwc",
"nhwc/depth_to_space.cl"},
422 {
"dequantization_layer_per_channel_nhwc",
"nhwc/dequantization_layer.cl"},
423 {
"dwc_native_fp_nhwc",
"nhwc/dwc_native_fp_nhwc.cl"},
424 {
"dwc_native_quantized_nhwc",
"nhwc/dwc_native_quantized_nhwc.cl"},
425 {
"direct_convolution_nhwc",
"nhwc/direct_convolution.cl"},
426 {
"direct_convolution3d_ndhwc",
"nhwc/direct_convolution3d.cl"},
427 {
"im2col3x3_nhwc",
"nhwc/im2col.cl"},
428 {
"im2col9x9_nhwc",
"nhwc/im2col.cl"},
429 {
"im2col_generic_nhwc",
"nhwc/im2col.cl"},
430 {
"indirect_convolution_nhwc",
"nhwc/indirect_convolution.cl"},
431 {
"indirect_convolution_address_precalculation",
"nhwc/indirect_convolution.cl"},
432 {
"normalization_layer_cross_map_nhwc",
"nhwc/normalization_layer.cl"},
433 {
"normalization_layer_in_map_nhwc",
"nhwc/normalization_layer.cl"},
434 {
"normalize_planar_yuv_layer_nhwc",
"nhwc/normalize_planar_yuv_layer.cl"},
435 {
"normalize_planar_yuv_layer_q8_nhwc",
"nhwc/normalize_planar_yuv_layer_quantized.cl"},
436 {
"pooling_layer_MxN_nhwc",
"nhwc/pooling_layer.cl"},
437 {
"pooling_layer_2x2_nhwc",
"nhwc/pooling_layer.cl"},
438 {
"pooling_layer_MxN_quantized_nhwc",
"nhwc/pooling_layer_quantized.cl"},
439 {
"pooling_3d_layer_MxN_ndhwc",
"nhwc/pooling_3d_layer.cl"},
440 {
"pooling_3d_layer_MxN_ndhwc_quantized",
"nhwc/pooling_3d_layer_quantized.cl"},
441 {
"reorg_layer_nhwc",
"nhwc/reorg_layer.cl"},
442 {
"scale_nearest_neighbour_nhwc",
"nhwc/scale.cl"},
443 {
"scale_bilinear_nhwc",
"nhwc/scale.cl"},
444 {
"space_to_batch_nhwc",
"nhwc/space_to_batch.cl"},
445 {
"space_to_batch_static_nhwc",
"nhwc/space_to_batch.cl"},
446 {
"space_to_depth_nhwc",
"nhwc/space_to_depth.cl"},
447 {
"transposed_convolution_nhwc",
"nhwc/transposed_convolution.cl"},
448 {
"upsample_layer_nhwc",
"nhwc/upsample_layer.cl"},
449 {
"winograd_filter_transform_4x1_3x1_nhwc",
"nhwc/winograd_filter_transform.cl"},
450 {
"winograd_filter_transform_1x4_1x3_nhwc",
"nhwc/winograd_filter_transform.cl"},
451 {
"winograd_filter_transform_4x4_3x3_nhwc",
"nhwc/winograd_filter_transform.cl"},
452 {
"winograd_filter_transform_4x4_5x5_nhwc",
"nhwc/winograd_filter_transform.cl"},
453 {
"winograd_filter_transform_4x1_5x1_nhwc",
"nhwc/winograd_filter_transform.cl"},
454 {
"winograd_filter_transform_1x4_1x5_nhwc",
"nhwc/winograd_filter_transform.cl"},
455 {
"winograd_filter_transform_2x2_7x7_nhwc",
"nhwc/winograd_filter_transform.cl"},
456 {
"winograd_filter_transform_2x1_7x1_nhwc",
"nhwc/winograd_filter_transform.cl"},
457 {
"winograd_filter_transform_1x2_1x7_nhwc",
"nhwc/winograd_filter_transform.cl"},
458 {
"winograd_input_transform_4x1_3x1_stepz1_nhwc",
"nhwc/winograd_input_transform.cl"},
459 {
"winograd_input_transform_1x4_1x3_stepz1_nhwc",
"nhwc/winograd_input_transform.cl"},
460 {
"winograd_input_transform_4x4_3x3_stepz1_nhwc",
"nhwc/winograd_input_transform.cl"},
461 {
"winograd_input_transform_4x4_5x5_stepz1_nhwc",
"nhwc/winograd_input_transform.cl"},
462 {
"winograd_input_transform_4x1_5x1_stepz1_nhwc",
"nhwc/winograd_input_transform.cl"},
463 {
"winograd_input_transform_1x4_1x5_stepz1_nhwc",
"nhwc/winograd_input_transform.cl"},
464 {
"winograd_input_transform_2x2_7x7_stepz1_nhwc",
"nhwc/winograd_input_transform.cl"},
465 {
"winograd_input_transform_2x1_7x1_stepz1_nhwc",
"nhwc/winograd_input_transform.cl"},
466 {
"winograd_input_transform_1x2_1x7_stepz1_nhwc",
"nhwc/winograd_input_transform.cl"},
467 {
"winograd_output_transform_4x1_3x1_nhwc",
"nhwc/winograd_output_transform.cl"},
468 {
"winograd_output_transform_1x4_1x3_nhwc",
"nhwc/winograd_output_transform.cl"},
469 {
"winograd_output_transform_4x4_3x3_nhwc",
"nhwc/winograd_output_transform.cl"},
470 {
"winograd_output_transform_4x4_5x5_nhwc",
"nhwc/winograd_output_transform.cl"},
471 {
"winograd_output_transform_4x1_5x1_nhwc",
"nhwc/winograd_output_transform.cl"},
472 {
"winograd_output_transform_1x4_1x5_nhwc",
"nhwc/winograd_output_transform.cl"},
473 {
"winograd_output_transform_2x2_7x7_nhwc",
"nhwc/winograd_output_transform.cl"},
474 {
"winograd_output_transform_2x1_7x1_nhwc",
"nhwc/winograd_output_transform.cl"},
475 {
"winograd_output_transform_1x2_1x7_nhwc",
"nhwc/winograd_output_transform.cl"},
479 const std::map<std::string, std::string> ClKernelLibrary::_program_source_map = {
480 #ifdef EMBEDDED_KERNELS
482 "activation_float_helpers.h",
483 #include "./cl_kernels/activation_float_helpers.hembed"
486 "activation_quant_helpers.h",
487 #include "./cl_kernels/activation_quant_helpers.hembed"
490 "common/activation_layer.cl",
491 #include "./cl_kernels/common/activation_layer.clembed"
494 "common/activation_layer_quant.cl",
495 #include "./cl_kernels/common/activation_layer_quant.clembed"
498 "common/arg_min_max.cl",
499 #include "./cl_kernels/common/arg_min_max.clembed"
502 "common/bitwise_op.cl",
503 #include "./cl_kernels/common/bitwise_op.clembed"
506 "common/bounding_box_transform.cl",
507 #include "./cl_kernels/common/bounding_box_transform.clembed"
510 "common/bounding_box_transform_quantized.cl",
511 #include "./cl_kernels/common/bounding_box_transform_quantized.clembed"
515 #include "./cl_kernels/common/col2im.clembed"
518 "common/comparisons.cl",
519 #include "./cl_kernels/common/comparisons.clembed"
522 "common/concatenate.cl",
523 #include "./cl_kernels/common/concatenate.clembed"
526 "common/convert_fc_weights.cl",
527 #include "./cl_kernels/common/convert_fc_weights.clembed"
530 "common/convolution_layer.cl",
531 #include "./cl_kernels/common/convolution_layer.clembed"
534 "common/copy_tensor.cl",
535 #include "./cl_kernels/common/copy_tensor.clembed"
538 "common/crop_tensor.cl",
539 #include "./cl_kernels/common/crop_tensor.clembed"
542 "common/deconvolution_layer.cl",
543 #include "./cl_kernels/common/deconvolution_layer.clembed"
547 #include "./cl_kernels/common/cast.clembed"
550 "common/dequantization_layer.cl",
551 #include "./cl_kernels/common/dequantization_layer.clembed"
554 "common/elementwise_operation.cl",
555 #include "./cl_kernels/common/elementwise_operation.clembed"
558 "common/elementwise_operation_quantized.cl",
559 #include "./cl_kernels/common/elementwise_operation_quantized.clembed"
562 "common/elementwise_unary.cl",
563 #include "./cl_kernels/common/elementwise_unary.clembed"
566 "common/elementwise_unary_quantized.cl",
567 #include "./cl_kernels/common/elementwise_unary_quantized.clembed"
571 #include "./cl_kernels/common/fft.clembed"
574 "common/fft_digit_reverse.cl",
575 #include "./cl_kernels/common/fft_digit_reverse.clembed"
578 "common/fft_scale.cl",
579 #include "./cl_kernels/common/fft_scale.clembed"
582 "common/fill_border.cl",
583 #include "./cl_kernels/common/fill_border.clembed"
587 #include "./cl_kernels/common/floor.clembed"
591 #include "./cl_kernels/common/gather.clembed"
595 #include "./cl_kernels/common/gemm.clembed"
598 "common/gemm_reshaped_only_rhs_mmul.cl",
599 #include "./cl_kernels/common/gemm_reshaped_only_rhs_mmul.clembed"
602 "common/gemm_utils.cl",
603 #include "./cl_kernels/common/gemm_utils.clembed"
606 "common/gemmlowp.cl",
607 #include "./cl_kernels/common/gemmlowp.clembed"
610 "common/gemmlowp_reshaped_only_rhs_mmul.cl",
611 #include "./cl_kernels/common/gemmlowp_reshaped_only_rhs_mmul.clembed"
615 #include "./cl_kernels/common/gemv.clembed"
618 "common/generate_proposals.cl",
619 #include "./cl_kernels/common/generate_proposals.clembed"
622 "common/generate_proposals_quantized.cl",
623 #include "./cl_kernels/common/generate_proposals_quantized.clembed"
627 #include "./cl_kernels/gemm_helpers.hembed"
631 #include "./cl_kernels/helpers.hembed"
635 #include "./cl_kernels/helpers_asymm.hembed"
639 #include "./cl_kernels/repeat.hembed"
643 #include "./cl_kernels/tile_helpers.hembed"
646 "common/instance_normalization.cl",
647 #include "./cl_kernels/common/instance_normalization.clembed"
650 "common/l2_normalize.cl",
651 #include "./cl_kernels/common/l2_normalize.clembed"
654 "common/mean_stddev_normalization.cl",
655 #include "./cl_kernels/common/mean_stddev_normalization.clembed"
659 #include "./cl_kernels/common/memset.clembed"
662 "common/minmax_layer.cl",
663 #include "./cl_kernels/common/minmax_layer.clembed"
667 #include "./cl_kernels/common/nonmax.clembed"
670 "common/batchnormalization_layer.cl",
671 #include "./cl_kernels/common/batchnormalization_layer.clembed"
674 "common/pad_layer.cl",
675 #include "./cl_kernels/common/pad_layer.clembed"
679 #include "./cl_kernels/common/permute.clembed"
682 "common/pixelwise_mul_float.cl",
683 #include "./cl_kernels/common/pixelwise_mul_float.clembed"
686 "common/pixelwise_mul_int.cl",
687 #include "./cl_kernels/common/pixelwise_mul_int.clembed"
690 "common/qlstm_layer_normalization.cl",
691 #include "./cl_kernels/common/qlstm_layer_normalization.clembed"
694 "common/quantization_layer.cl",
695 #include "./cl_kernels/common/quantization_layer.clembed"
699 #include "./cl_kernels/common/range.clembed"
702 "common/reduction_operation.cl",
703 #include "./cl_kernels/common/reduction_operation.clembed"
706 "common/reshape_layer.cl",
707 #include "./cl_kernels/common/reshape_layer.clembed"
711 #include "./cl_kernels/common/reverse.clembed"
714 "common/roi_align_layer.cl",
715 #include "./cl_kernels/common/roi_align_layer.clembed"
718 "common/roi_align_layer_quantized.cl",
719 #include "./cl_kernels/common/roi_align_layer_quantized.clembed"
722 "common/roi_pooling_layer.cl",
723 #include "./cl_kernels/common/roi_pooling_layer.clembed"
727 #include "./cl_kernels/common/select.clembed"
730 "common/softmax_layer.cl",
731 #include "./cl_kernels/common/softmax_layer.clembed"
734 "common/slice_ops.cl",
735 #include "./cl_kernels/common/slice_ops.clembed"
738 "common/stack_layer.cl",
739 #include "./cl_kernels/common/stack_layer.clembed"
743 #include "./cl_kernels/common/tile.clembed"
746 "common/transpose.cl",
747 #include "./cl_kernels/common/transpose.clembed"
751 #include "./cl_kernels/types.hembed"
754 "common/unpooling_layer.cl",
755 #include "./cl_kernels/common/unpooling_layer.clembed"
759 #include "./cl_kernels/common/mat_mul.clembed"
762 "common/mat_mul_mmul.cl",
763 #include "./cl_kernels/common/mat_mul_mmul.clembed"
766 "common/mat_mul_quantized.cl",
767 #include "./cl_kernels/common/mat_mul_quantized.clembed"
770 "common/mat_mul_quantized_mmul.cl",
771 #include "./cl_kernels/common/mat_mul_quantized_mmul.clembed"
773 #ifdef ENABLE_NCHW_KERNELS
775 "nchw/batch_to_space.cl",
776 #include "./cl_kernels/nchw/batch_to_space.clembed"
779 "nchw/channel_shuffle.cl",
780 #include "./cl_kernels/nchw/channel_shuffle.clembed"
783 "nchw/upsample_layer.cl",
784 #include "./cl_kernels/nchw/upsample_layer.clembed"
787 "nchw/depth_to_space.cl",
788 #include "./cl_kernels/nchw/depth_to_space.clembed"
791 "nchw/dequantization_layer.cl",
792 #include "./cl_kernels/nchw/dequantization_layer.clembed"
795 "nchw/direct_convolution.cl",
796 #include "./cl_kernels/nchw/direct_convolution.clembed"
800 #include "./cl_kernels/nchw/im2col.clembed"
803 "nchw/normalization_layer.cl",
804 #include "./cl_kernels/nchw/normalization_layer.clembed"
807 "nchw/normalize_planar_yuv_layer.cl",
808 #include "./cl_kernels/nchw/normalize_planar_yuv_layer.clembed"
811 "nchw/normalize_planar_yuv_layer_quantized.cl",
812 #include "./cl_kernels/nchw/normalize_planar_yuv_layer_quantized.clembed"
815 "nchw/batchnormalization_layer.cl",
816 #include "./cl_kernels/nchw/batchnormalization_layer.clembed"
819 "nchw/pooling_layer.cl",
820 #include "./cl_kernels/nchw/pooling_layer.clembed"
823 "nchw/prior_box_layer.cl",
824 #include "./cl_kernels/nchw/prior_box_layer.clembed"
827 "nchw/reorg_layer.cl",
828 #include "./cl_kernels/nchw/reorg_layer.clembed"
832 #include "./cl_kernels/nchw/scale.clembed"
835 "nchw/space_to_batch.cl",
836 #include "./cl_kernels/nchw/space_to_batch.clembed"
839 "nchw/space_to_depth.cl",
840 #include "./cl_kernels/nchw/space_to_depth.clembed"
843 "nchw/winograd_filter_transform.cl",
844 #include "./cl_kernels/nchw/winograd_filter_transform.clembed"
847 "nchw/winograd_input_transform.cl",
848 #include "./cl_kernels/nchw/winograd_input_transform.clembed"
851 "nchw/winograd_output_transform.cl",
852 #include "./cl_kernels/nchw/winograd_output_transform.clembed"
856 #ifdef ENABLE_NHWC_KERNELS
858 "nhwc/batch_to_space.cl",
859 #include "./cl_kernels/nhwc/batch_to_space.clembed"
862 "nhwc/channel_shuffle.cl",
863 #include "./cl_kernels/nhwc/channel_shuffle.clembed"
866 "nhwc/upsample_layer.cl",
867 #include "./cl_kernels/nhwc/upsample_layer.clembed"
870 "nhwc/depth_to_space.cl",
871 #include "./cl_kernels/nhwc/depth_to_space.clembed"
874 "nhwc/dequantization_layer.cl",
875 #include "./cl_kernels/nhwc/dequantization_layer.clembed"
878 "nhwc/direct_convolution.cl",
879 #include "./cl_kernels/nhwc/direct_convolution.clembed"
882 "nhwc/direct_convolution3d.cl",
883 #include "./cl_kernels/nhwc/direct_convolution3d.clembed"
886 "nhwc/dwc_native_fp_nhwc.cl",
887 #include "./cl_kernels/nhwc/dwc_native_fp_nhwc.clembed"
890 "nhwc/dwc_native_quantized_nhwc.cl",
891 #include "./cl_kernels/nhwc/dwc_native_quantized_nhwc.clembed"
894 "nhwc/normalization_layer.cl",
895 #include "./cl_kernels/nhwc/normalization_layer.clembed"
898 "nhwc/normalize_planar_yuv_layer.cl",
899 #include "./cl_kernels/nhwc/normalize_planar_yuv_layer.clembed"
902 "nhwc/normalize_planar_yuv_layer_quantized.cl",
903 #include "./cl_kernels/nhwc/normalize_planar_yuv_layer_quantized.clembed"
907 #include "./cl_kernels/nhwc/im2col.clembed"
910 "nhwc/indirect_convolution.cl",
911 #include "./cl_kernels/nhwc/indirect_convolution.clembed"
914 "nhwc/batchnormalization_layer.cl",
915 #include "./cl_kernels/nhwc/batchnormalization_layer.clembed"
918 "nhwc/pooling_layer.cl",
919 #include "./cl_kernels/nhwc/pooling_layer.clembed"
922 "nhwc/pooling_3d_layer.cl",
923 #include "./cl_kernels/nhwc/pooling_3d_layer.clembed"
926 "nhwc/pooling_3d_layer_quantized.cl",
927 #include "./cl_kernels/nhwc/pooling_3d_layer_quantized.clembed"
930 "nhwc/pooling_layer_quantized.cl",
931 #include "./cl_kernels/nhwc/pooling_layer_quantized.clembed"
934 "nhwc/reorg_layer.cl",
935 #include "./cl_kernels/nhwc/reorg_layer.clembed"
939 #include "./cl_kernels/nhwc/scale.clembed"
942 "nhwc/space_to_batch.cl",
943 #include "./cl_kernels/nhwc/space_to_batch.clembed"
946 "nhwc/space_to_depth.cl",
947 #include "./cl_kernels/nhwc/space_to_depth.clembed"
950 "nhwc/transposed_convolution.cl",
951 #include "./cl_kernels/nhwc/transposed_convolution.clembed"
954 "nhwc/winograd_filter_transform.cl",
955 #include "./cl_kernels/nhwc/winograd_filter_transform.clembed"
958 "nhwc/winograd_input_transform.cl",
959 #include "./cl_kernels/nhwc/winograd_input_transform.clembed"
962 "nhwc/winograd_output_transform.cl",
963 #include "./cl_kernels/nhwc/winograd_output_transform.clembed"
972 return _kernel_library;
978 auto kernel_program_it = _kernel_program_map.find(
kernel_name);
980 if (_kernel_program_map.end() == kernel_program_it)
985 const std::string
program_name = kernel_program_it->second;
1003 #ifdef EMBEDDED_KERNELS
1004 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS
1005 const auto inflatted_program_source_it = _decompressed_source_map.find(
program_name);
1006 if (inflatted_program_source_it != _decompressed_source_map.end())
1008 return ClProgramInfo{inflatted_program_source_it->second,
false};
1012 const auto program_source_it = _program_source_map.find(
program_name);
1013 if (program_source_it == _program_source_map.end())
1017 std::string program_source = program_source_it->second;
1019 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS
1020 std::string decompressed_program_source = decompress_zlib(decode_base64(program_source_it->second));
1022 _decompressed_source_map.insert(std::make_pair(
program_name, decompressed_program_source));
1023 program_source = std::move(decompressed_program_source);
1030 std::string binary_name = source_name +
"bin";
1031 std::string program_source{};
1032 bool is_binary =
false;
1034 if (std::ifstream(binary_name).is_open())
1036 program_source =
read_file(binary_name,
true);
1039 else if (std::ifstream(source_name).is_open())
1041 program_source =
read_file(source_name,
false);