58 inline TensorShape get_reshaped_matmul_tensor(
const TensorShape &
src)
60 return TensorShape(
src.x(), 1,
src.y(),
src.collapsed_from(2).z());
63 Status construct_gemmlowp_output_stage(
const ITensorInfo &
src,
64 const ITensorInfo &weights,
65 const ITensorInfo &
dst,
66 GEMMLowpOutputStageInfo &gemmlowp_output_stage,
67 ActivationLayerInfo activation_info)
70 gemmlowp_output_stage.gemmlowp_offset = 0;
71 gemmlowp_output_stage.gemmlowp_multiplier = 0;
72 gemmlowp_output_stage.gemmlowp_shift = 0;
79 const QuantizationInfo oq_info =
dst.quantization_info();
80 const UniformQuantizationInfo iq_unif =
src.quantization_info().uniform();
81 const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();
82 const UniformQuantizationInfo oq_unif = oq_info.uniform();
84 const auto output_quant_info = (
dst.total_size() == 0) ? iq_unif : oq_unif;
86 const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;
87 int output_multiplier = 0;
92 PixelValue type_min{};
93 PixelValue type_max{};
96 if (activation_info.enabled())
98 std::tie(type_min, type_max) =
103 gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
104 gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
105 gemmlowp_output_stage.gemmlowp_shift = output_shift;
106 gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);
107 gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);
108 type_min.get(gemmlowp_output_stage.gemmlowp_min_bound);
109 type_max.get(gemmlowp_output_stage.gemmlowp_max_bound);
115 Status validate_mm(
const ITensorInfo &
src,
116 const ITensorInfo &weights,
117 const ITensorInfo *
bias,
118 const ITensorInfo &
dst,
119 const FullyConnectedLayerInfo &fc_info,
123 const bool transpose_weights = fc_info.transpose_weights ? !fc_info.are_weights_reshaped :
false;
124 const bool use_dynamic_gemm =
125 !use_matmul && !weights.are_values_constant() && transpose_weights;
130 const MatMulInfo m_info = MatMulInfo().adj_rhs(transpose_weights);
133 TensorInfo lhs_to_use =
src.clone()->set_tensor_shape(get_reshaped_matmul_tensor(
src.tensor_shape()));
136 std::unique_ptr<cl_matmul::IClMatMulNativeKernelConfig>
t =
138 const MatMulKernelInfo kernel_info =
t->configure(&lhs_to_use, &weights, m_info);
141 kernel_info, fc_info.activation_info)
142 : kernels::ClMatMulNativeKernel::
validate(&lhs_to_use, &weights,
bias, &
dst, kernel_info,
143 fc_info.activation_info);
147 GEMMLowpOutputStageInfo gemmlowp_output_stage;
149 construct_gemmlowp_output_stage(
src, weights,
dst, gemmlowp_output_stage, fc_info.activation_info));
151 const GEMMInfo &gemm_info = GEMMInfo(
false,
156 fc_info.retain_internal_weights,
157 gemmlowp_output_stage,
158 fc_info.fp_mixed_precision,
161 ActivationLayerInfo());
165 const UniformQuantizationInfo iq_info =
src.quantization_info().uniform();
166 const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
170 const QuantizationInfo src_quantization_info(iq_info.scale, -iq_info.offset);
171 const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
175 &
src.clone()->set_quantization_info(src_quantization_info),
176 &weights.clone()->set_quantization_info(weights_quantization_info),
bias, &
dst, gemm_info));
189 : _convert_weights(nullptr),
191 _reshape_weights(nullptr),
193 _mm_gemmlowp(nullptr),
194 _matmul_native_kernel(nullptr),
195 _matmul_lowp_native_kernel(nullptr),
202 void ClFullyConnected::configure_mm(
const CLCompileContext &compile_context,
217 _lhs_to_use =
src->clone()->set_tensor_shape(get_reshaped_matmul_tensor(_lhs_to_use.
tensor_shape()));
221 std::unique_ptr<cl_matmul::IClMatMulNativeKernelConfig> kernel_config =
228 _matmul_lowp_native_kernel = std::make_unique<kernels::ClMatMulLowpNativeKernel>();
229 _matmul_lowp_native_kernel->set_target(gpu_target);
230 _matmul_lowp_native_kernel->configure(compile_context,
src, weights,
bias,
dst, kernel_info,
235 _matmul_native_kernel = std::make_unique<kernels::ClMatMulNativeKernel>();
236 _matmul_native_kernel->set_target(gpu_target);
237 _matmul_native_kernel->configure(compile_context,
src, weights,
bias,
dst, kernel_info,
244 GEMMLowpOutputStageInfo gemmlowp_output_stage;
245 construct_gemmlowp_output_stage(*
src, *weights, *
dst, gemmlowp_output_stage, fc_info.
activation_info);
247 const GEMMInfo &gemm_info = GEMMInfo(
false,
253 gemmlowp_output_stage,
263 const QuantizationInfo src_quantization_info =
src->quantization_info();
266 TensorInfo
src_info =
src->clone()->set_quantization_info(src_quantization_info);
267 TensorInfo
weights_info = weights->
clone()->set_quantization_info(weights_quantization_info);
270 QuantizationInfo(src_quantization_info.uniform().scale, -src_quantization_info.uniform().offset));
271 weights_info.set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale,
272 -weights_quantization_info.uniform().offset));
275 _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>();
281 _mm_gemm = std::make_unique<ClGemm>();
282 _mm_gemm->configure(compile_context,
src, weights,
bias,
dst, 1.f, 1.f, gemm_info);
287 void ClFullyConnected::configure_conv_fc(
const CLCompileContext &compile_context,
289 ITensorInfo *weights,
292 const FullyConnectedLayerInfo &fc_info)
296 (
src->dimension(0) *
src->dimension(1) *
src->dimension(2))));
301 _flattened_src =
src->clone()
302 ->set_is_resizable(
true)
308 _flatten = std::make_unique<ClFlatten>();
309 _flatten->configure(compile_context,
src, &_flattened_src);
313 configure_mm(compile_context, &_flattened_src, weights,
bias,
dst, fc_info);
316 void ClFullyConnected::configure_fc_fc(
const CLCompileContext &compile_context,
318 ITensorInfo *weights,
321 const FullyConnectedLayerInfo &fc_info)
327 configure_mm(compile_context,
src, weights,
bias,
dst, fc_info);
345 _is_fc_after_conv =
true;
356 const bool is_batched_fc_layer =
dst->dimension(1) > 1;
368 if (is_batched_fc_layer)
371 (std::equal(
src->tensor_shape().cbegin() + 3,
src->tensor_shape().cend(),
372 dst->tensor_shape().cbegin() + 1));
376 _is_fc_after_conv =
src->num_dimensions() > 1;
382 if (_transpose_weights && !_use_matmul)
385 _reshape_weights = std::make_unique<ClTranspose>();
386 _reshape_weights->configure(compile_context, weights, &_reshaped_weights);
387 weights_used = &_reshaped_weights;
395 _convert_weights = std::make_unique<ClConvertFullyConnectedWeights>();
396 _convert_weights->configure(compile_context, weights_used, &_converted_weights,
src->tensor_shape(),
399 weights_used = &_converted_weights;
401 _run_convert_weights =
true;
404 if (_is_fc_after_conv)
407 configure_conv_fc(compile_context,
src, weights_used, biases,
dst, fc_info);
412 configure_fc_fc(compile_context,
src, weights_used, biases,
dst, fc_info);
415 _weights_to_use = *weights_used;
420 _aux_mem[ConvertedWeights] =
426 auto gemm_mem_req = (_is_quantized) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace();
427 for (
unsigned int i = 0; i < gemm_mem_req.size(); ++i)
429 _aux_mem[i] = gemm_mem_req[i];
431 if (_aux_mem[1].size > 0 || _aux_mem[2].size > 0)
445 const auto transposed_wei_lft = (_weights_to_use_idx ==
offset_int_vec(TransposedWeights))
446 ? MemoryLifetime::Persistent
448 const auto converted_wei_lft = (_weights_to_use_idx ==
offset_int_vec(ConvertedWeights))
449 ? MemoryLifetime::Persistent
453 _dynamic_gemm ? MemoryLifetime::Temporary : transposed_wei_lft,
456 _dynamic_gemm ? MemoryLifetime::Temporary : converted_wei_lft,
460 _aux_mem[FlattenedSrc] =
483 bool is_fc_after_conv =
true;
487 const bool is_batched_fc_layer =
dst->dimension(1) > 1;
489 !is_batched_fc_layer &&
493 ->set_is_resizable(
true)
499 const ITensorInfo &converted_weights = (transpose_weights && !use_matmul)
501 :
TensorInfo(weights->
clone()->set_is_resizable(
true).reset_padding());
512 if (biases !=
nullptr)
526 if (is_batched_fc_layer)
529 (std::equal(
src->tensor_shape().cbegin() + 3,
src->tensor_shape().cend(),
530 dst->tensor_shape().cbegin() + 1));
534 is_fc_after_conv =
src->num_dimensions() > 1;
538 if (transpose_weights && !use_matmul)
542 weights_to_use = &reshaped_weights;
550 weights_to_use = &converted_weights;
553 if (is_fc_after_conv)
557 const int weight_idx = (use_matmul && transpose_weights) ? 0 : 1;
559 (weights_to_use->
dimension(weight_idx) != (
src->dimension(0) *
src->dimension(1) *
src->dimension(2))));
563 src_to_use = &flatten_src;
569 const int weight_idx = (use_matmul && transpose_weights) ? 0 : 1;
583 #ifdef ARM_COMPUTE_ASSERTS_ENABLED
586 #endif // ARM_COMPUTE_ASSERTS_ENABLED
594 if (_is_fc_after_conv)
597 _flatten->run(flatten_pack);
625 _mm_gemmlowp->run(gemm_pack);
629 _mm_gemm->run(gemm_pack);
637 if (!_is_prepared || _dynamic_gemm)
639 #ifdef ARM_COMPUTE_ASSERTS_ENABLED
640 ++_asrt_prepare_count;
642 #endif // ARM_COMPUTE_ASSERTS_ENABLED
650 const ITensor *cur_weights = weights;
653 if (_transpose_weights && !_use_matmul)
657 _reshape_weights->run(transpose_pack);
660 cur_weights = reshaped_weights.
get();
664 if (_run_convert_weights)
667 _convert_weights->run(convert_pack);
670 cur_weights = converted_weights.
get();
677 if (_dynamic_gemm || !_use_matmul)
681 _mm_gemm->prepare(gemm_pack);
685 _mm_gemmlowp->prepare(gemm_pack);