50 using namespace experimental;
51 using namespace misc::shape_calculator;
52 using namespace utils::cast;
56 : _weights_reshape_kernel(nullptr),
57 _im2col_kernel(nullptr),
59 _mm_gemmlowp(nullptr),
60 _col2im_kernel(nullptr),
61 _activation_kernel(nullptr),
68 _fuse_activation(true),
71 _aux_mem(AuxTensorIdx::Count)
87 validate_mm(
src, weights, biases,
dst, gemmlowp_output_stage, gemm_3d_depth, _skip_im2col,
act_info));
95 gemmlowp_output_stage,
110 tmp_src.set_quantization_info(
115 _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>();
116 _mm_gemmlowp->configure(compile_context, &tmp_src, weights, biases,
dst, gemm_info);
121 auto mm_mem_req = _mm_gemmlowp->workspace();
122 for (
unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
124 _aux_mem[cont] = mm_mem_req[cont];
130 _mm_gemm = std::make_unique<ClGemm>();
131 _mm_gemm->configure(compile_context, &tmp_src, weights, biases,
dst, 1.0f, 1.0f, gemm_info);
132 auto mm_mem_req = _mm_gemm->workspace();
133 for (
unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
135 _aux_mem[cont] = mm_mem_req[cont];
140 Status ClGemmConv2d::validate_mm(
const ITensorInfo *
src,
141 const ITensorInfo *weights,
142 const ITensorInfo *biases,
143 const ITensorInfo *
dst,
144 const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
147 const ActivationLayerInfo &
act_info)
151 const GEMMInfo &gemm_info = GEMMInfo(
false,
157 gemmlowp_output_stage,
168 const QuantizationInfo input_quantization_info =
src->quantization_info();
169 const QuantizationInfo weights_quantization_info = weights->quantization_info();
171 std::unique_ptr<ITensorInfo> src_qa =
src->clone();
172 std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
173 src_qa->set_quantization_info(
174 QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
175 weights_qa->set_quantization_info(
176 QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
209 const unsigned int num_kernels = weights->
dimension(idx_kernels);
221 _fuse_activation =
true;
227 unsigned int stride_x = 0;
228 unsigned int stride_y = 0;
232 unsigned int conv_w = 0;
233 unsigned int conv_h = 0;
237 unsigned int mat_weights_cols = num_kernels / conv2d_info.
num_groups;
240 _append_bias =
false;
242 _weights_reshape_kernel = std::make_unique<kernels::ClWeightsReshapeKernel>();
243 if (conv2d_info.
num_groups != 1 && biases !=
nullptr)
247 biases_to_use =
nullptr;
249 _weights_reshape_kernel->configure(compile_context, weights, biases, &_weights_reshaped,
254 _weights_reshape_kernel->configure(compile_context, weights,
nullptr, &_weights_reshaped,
262 _im2col_kernel = std::make_unique<opencl::kernels::ClIm2ColKernel>();
266 _im2col_kernel->configure(compile_context,
src, &_im2col_output,
Size2D(kernel_width, kernel_height),
274 gemm_input_to_use = &_im2col_output;
284 shape_gemm.
set(0, mat_weights_cols);
285 shape_gemm.
set(1, conv_w * conv_h);
291 gemm_output_to_use = &_gemm_output;
301 const auto output_quant_info = (
dst->total_size() == 0) ? iq_info : oq_info;
303 const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
319 auto min_activation = min_val.get<int32_t>();
320 auto max_activation = max_val.get<int32_t>();
322 const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = {
323 ActivationLayerInfo::ActivationFunction::RELU, ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
324 ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU};
330 std::tie(min_activation, max_activation) =
335 _fuse_activation =
false;
349 configure_mm(compile_context, gemm_input_to_use, &_weights_reshaped, biases_to_use, gemm_output_to_use,
350 gemmlowp_output_stage, gemm_3d_depth, conv2d_info.
act_info);
355 _col2im_kernel = std::make_unique<opencl::kernels::ClCol2ImKernel>();
358 _col2im_kernel->configure(compile_context, gemm_output_to_use,
dst,
Size2D(conv_w, conv_h),
364 "Output shape does not match the expected one");
366 if (!_fuse_activation)
368 _activation_kernel = std::make_unique<opencl::kernels::ClActivationKernel>();
369 _activation_kernel->configure(compile_context,
dst,
nullptr, conv2d_info.
act_info);
372 _aux_mem[Im2ColOutput] =
374 _aux_mem[WeightsReshaped] =
392 if (!is_quantized_per_channel)
398 "Grouping (num_groups != 1) with NHWC data layout is not supported");
400 "Grouping (num_groups != 1) is not supported with QASYMM8");
413 const unsigned int num_kernels = weights->
dimension(idx_kernels);
425 bool fuse_activation =
true;
428 src->dimension(idx_channel));
432 if (biases !=
nullptr)
452 unsigned int conv_w = 0;
453 unsigned int conv_h = 0;
458 unsigned int mat_weights_cols = num_kernels / conv2d_info.
num_groups;
461 bool append_bias =
false;
463 if (conv2d_info.
num_groups != 1 && biases !=
nullptr)
467 biases_to_use =
nullptr;
469 weights_reshaped_info =
474 weights_reshaped_info =
478 weights_to_use = &weights_reshaped_info;
482 const Size2D kernel_dims(kernel_width, kernel_height);
494 gemm_input_to_use = &im2col_reshaped_info;
503 shape_gemm.
set(0, mat_weights_cols);
504 shape_gemm.
set(1, conv_w * conv_h);
507 info_gemm.set_quantization_info(
dst->quantization_info()).set_data_layout(
src->data_layout());
508 gemm_output_to_use = &info_gemm;
520 const auto output_quant_info = (
dst->total_size() == 0) ? iq_info : oq_info;
521 const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
531 int min_activation = 0;
532 int max_activation = 0;
534 const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = {
535 ActivationLayerInfo::ActivationFunction::RELU, ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
536 ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU};
542 std::tie(min_activation, max_activation) =
547 fuse_activation =
false;
561 gemmlowp_output_stage, gemm_3d_depth, skip_im2col, conv2d_info.
act_info));
571 if (!fuse_activation)
586 auto gemm_input_to_use =
src;
587 auto gemm_output_to_use =
dst;
598 gemm_input_to_use = im2col_output.
get();
602 gemm_output_to_use = gemm_output.
get();
616 _mm_gemmlowp->run(pack_mm);
621 _mm_gemm->run(pack_mm);
632 if (!_fuse_activation)
659 _is_quantized ? _mm_gemmlowp->prepare(tensors) : _mm_gemm->prepare(tensors);