Compute Library
 21.05
CLGEMMConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 #include "arm_compute/core/Utils.h"
48 #include "support/Cast.h"
49 
50 #include <cmath>
51 #include <memory>
52 #include <tuple>
53 
54 namespace arm_compute
55 {
57 using namespace arm_compute::utils::cast;
58 
60  : _weights_reshape_kernel(std::make_unique<CLWeightsReshapeKernel>())
61 {
62 }
63 
65 
66 void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
67 {
68  configure(CLKernelLibrary::get().get_compile_context(), weights, biases, output, num_groups);
69 }
70 
71 void CLConvolutionLayerReshapeWeights::configure(const CLCompileContext &compile_context, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
72 {
73  // Perform validation step
74  ARM_COMPUTE_ERROR_ON_NULLPTR(weights, output);
76  (biases != nullptr) ? biases->info() : nullptr,
77  output->info(),
78  num_groups));
79 
80  const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
81  const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr;
82 
83  _weights_reshape_kernel->configure(compile_context, weights, biases_to_use, output, num_groups);
84 
85  output->info()->set_quantization_info(weights->info()->quantization_info());
86 }
87 
88 Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups)
89 {
93 
94  if(biases != nullptr)
95  {
98 
100  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
102  }
103 
104  if((output != nullptr) && (output->total_size() != 0))
105  {
107  CLWeightsReshapeKernel::validate(weights, biases, output, num_groups);
108  }
109 
110  return Status{};
111 }
112 
114 {
115  CLScheduler::get().enqueue(*_weights_reshape_kernel);
116 }
117 
118 CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
119  : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(std::make_unique<CLIm2ColKernel>()), _mm_gemm(memory_manager,
120  weights_manager), _mm_gemmlowp(memory_manager), _col2im_kernel(std::make_unique<CLCol2ImKernel>()), _activationlayer_function(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(),
121  _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
122 {
123 }
124 
126 
127 void CLGEMMConvolutionLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
128  const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
129  int gemm_3d_depth, const ActivationLayerInfo &act_info)
130 {
132  ARM_COMPUTE_ERROR_THROW_ON(validate_mm(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info));
133 
134  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
135  false, // is_b_reshaped
136  true, // reshape_b_only_on_first_run
137  gemm_3d_depth, // depth_output_gemm3d
138  _skip_im2col, // reinterpret_input_as_3d
139  false, // retain_internal_weights
140  gemmlowp_output_stage, // gemmlowp_output_stage
141  false, // fp_mixed_precision
142  true, // broadcast_bias
143  act_info); // activation_info
144 
145  if(_is_quantized)
146  {
147  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
148  // Extract and negate input and weights offset
149  const QuantizationInfo input_quantization_info = input->info()->quantization_info();
150  const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
151 
152  input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
153  weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
154 
155  _mm_gemmlowp.configure(compile_context, input, weights, biases, output, gemm_info);
156 
157  // Revert back QuantizatioInfo as input and weights could be used in other convolution layers
158  input->info()->set_quantization_info(input_quantization_info);
159  weights->info()->set_quantization_info(weights_quantization_info);
160  }
161  else
162  {
163  // Configure matrix multiply function
164  _mm_gemm.configure(compile_context, input, weights, biases, output, 1.0f, 1.0f, gemm_info);
165  }
166 }
167 
168 Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
169  const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info)
170 {
171  const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
172 
173  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
174  false, // is_b_reshaped
175  true, // reshape_b_only_on_first_run
176  gemm_3d_depth, // depth_output_gemm3d
177  skip_im2col, // reinterpret_input_as_3d
178  false, // retain_internal_weights
179  gemmlowp_output_stage, // gemmlowp_output_stage
180  false, // fp_mixed_precision
181  true, // broadcast_bias
182  act_info); // activation_info
183 
184  if(is_quantized)
185  {
186  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
187  // Extract and negate input and weights offset
188  const QuantizationInfo input_quantization_info = input->quantization_info();
189  const QuantizationInfo weights_quantization_info = weights->quantization_info();
190 
191  std::unique_ptr<ITensorInfo> input_qa = input->clone();
192  std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
193  input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
194  weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
195 
196  // Perform validation step on GEMMLowp
197  return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info);
198  }
199  else
200  {
201  // Perform validation step on Matrix multiply function
202  return CLGEMM::validate(input, weights, biases, output, 1.0f, 1.0f, gemm_info);
203  }
204 }
205 
207  const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
208 {
209  configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups);
210 }
211 
212 void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
213  const PadStrideInfo &conv_info,
214  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
215 {
216  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
217 
219  weights->info(),
220  biases != nullptr ? biases->info() : nullptr,
221  output->info(),
222  conv_info,
223  weights_info,
224  dilation,
225  act_info,
226  num_groups));
227 
228  const DataType data_type = input->info()->data_type();
229  const DataLayout data_layout = input->info()->data_layout();
233 
234  const unsigned int kernel_width = weights->info()->dimension(idx_width);
235  const unsigned int kernel_height = weights->info()->dimension(idx_height);
236  const unsigned int num_kernels = weights->info()->dimension(idx_kernels);
237 
238  const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
239  const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
240 
241  _is_prepared = weights_info.retain_internal_weights();
242  _original_weights = weights;
243  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
244  _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
245  _skip_col2im = data_layout == DataLayout::NHWC;
246 
247  // Only for quantize there are few cases where we cannot fuse the activation function in GEMM
248  _fuse_activation = true;
249 
250  // Set the GPU target for im2col and col2im
251  _im2col_kernel->set_target(CLScheduler::get().target());
252  _col2im_kernel->set_target(CLScheduler::get().target());
253 
254  const ICLTensor *gemm_input_to_use = input;
255  ICLTensor *gemm_output_to_use = output;
256 
257  // Get parameters from conv_info
258  unsigned int stride_x = 0;
259  unsigned int stride_y = 0;
260  std::tie(stride_x, stride_y) = conv_info.stride();
261 
262  // Get convolved dimensions
263  unsigned int conv_w = 0;
264  unsigned int conv_h = 0;
265  std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(idx_width),
266  input->info()->dimension(idx_height),
267  kernel_width,
268  kernel_height,
269  conv_info,
270  dilation);
271 
272  unsigned int mat_weights_cols = num_kernels / num_groups;
273 
274  const ICLTensor *biases_to_use = biases;
275  bool append_bias = false;
276 
277  ICLTensor *weights_to_use = &_weights_reshaped;
278  if(num_groups != 1 && biases != nullptr)
279  {
280  // num_groups != 1 can only be for NCHW
281  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
282  biases_to_use = nullptr;
283  append_bias = true;
284 
285  if(_weights_manager && _weights_manager->are_weights_managed(weights))
286  {
287  _reshape_weights_managed.configure(compile_context, weights, biases, num_groups);
288  weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
289  }
290  else
291  {
292  _reshape_weights.configure(compile_context, weights, biases, &_weights_reshaped, num_groups);
293  }
294  }
295  else
296  {
297  if(_weights_manager && _weights_manager->are_weights_managed(weights))
298  {
299  _reshape_weights_managed.configure(compile_context, weights, nullptr, num_groups);
300  weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
301  }
302  else
303  {
304  _reshape_weights.configure(compile_context, weights, nullptr, &_weights_reshaped, num_groups);
305  }
306  }
307 
308  // Create tensor to store im2col reshaped inputs
309  if(!_skip_im2col)
310  {
311  _memory_group.manage(&_im2col_output);
312 
313  // Configure and tune im2col. im2col output shape is auto-initialized
314  _im2col_kernel->configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
315 
316  // Set quantization info
317  _im2col_output.info()->set_quantization_info(input->info()->quantization_info());
318  CLScheduler::get().tune_kernel_static(*_im2col_kernel);
319 
320  // Update GEMM input
321  gemm_input_to_use = &_im2col_output;
322  }
323 
324  // Create GEMM output tensor
325  if(!_skip_col2im)
326  {
327  TensorShape shape_gemm;
328 
329  // If we cannot skip col2im it means we run im2col as well
330  shape_gemm = _im2col_output.info()->tensor_shape();
331  shape_gemm.set(0, mat_weights_cols);
332  shape_gemm.set(1, conv_w * conv_h);
333 
334  TensorInfo info_gemm(shape_gemm, 1, data_type);
335  info_gemm.set_quantization_info(output->info()->quantization_info()).set_data_layout(input->info()->data_layout());
336  _gemm_output.allocator()->init(info_gemm);
337  _memory_group.manage(&_gemm_output);
338 
339  // Update GEMM output
340  gemm_output_to_use = &_gemm_output;
341  }
342 
343  GEMMLowpOutputStageInfo gemmlowp_output_stage;
345  gemmlowp_output_stage.gemmlowp_offset = 0;
346 
347  // Configure output stage for quantized case
348  if(_is_quantized)
349  {
350  const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
351  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->info()->data_type());
352  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
353 
354  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
355 
356  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
357  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
359  weights->info(),
360  output->info(),
361  idx_kernels,
362  gemmlowp_output_stage.gemmlowp_multipliers.data(),
363  gemmlowp_output_stage.gemmlowp_shifts.data());
364  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
365  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
366 
367  PixelValue min_val{};
368  PixelValue max_val{};
369  std::tie(min_val, max_val) = get_min_max(output->info()->data_type());
370 
371  auto min_activation = min_val.get<int32_t>();
372  auto max_activation = max_val.get<int32_t>();
373 
374  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
377  };
378 
379  if(act_info.enabled())
380  {
381  if(supported_acts.count(act_info.activation()) != 0)
382  {
383  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, output_quant_info);
384  }
385  else
386  {
387  _fuse_activation = false;
388  }
389  }
390 
391  // Set the GEMMLowp output stage info
392  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
393  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
394  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
395  }
396 
397  // Configure and tune GEMM
398  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
399  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
400 
401  configure_mm(compile_context, gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, act_info);
402 
403  if(!_skip_im2col)
404  {
405  _im2col_output.allocator()->allocate();
406  }
407 
408  if(!_skip_col2im)
409  {
410  // Configure and tune Col2Im
411  _col2im_kernel->configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
412  CLScheduler::get().tune_kernel_static(*_col2im_kernel.get());
413  }
414 
415  if(!_skip_col2im)
416  {
417  _gemm_output.allocator()->allocate();
418  }
419 
420  ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
421  "Output shape does not match the expected one");
422 
423  if(!_fuse_activation)
424  {
425  _activationlayer_function.configure(compile_context, output, nullptr, act_info);
426  }
427 
429 }
430 
432  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
433 {
434  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
435  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
437  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type());
438 
439  if(!is_quantized_per_channel)
440  {
442  }
444  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
445  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_type() == DataType::QASYMM8), "Grouping (num_groups != 1) is not supported with QASYMM8");
446  ARM_COMPUTE_RETURN_ERROR_ON(((input->dimension(2) / weights->dimension(2)) != num_groups) && (input->data_layout() == DataLayout::NCHW));
447 
448  const DataLayout data_layout = input->data_layout();
449  const DataType data_type = input->data_type();
454 
455  const unsigned int kernel_width = weights->dimension(idx_width);
456  const unsigned int kernel_height = weights->dimension(idx_height);
457  const unsigned int num_kernels = weights->dimension(idx_kernels);
458 
459  TensorInfo im2col_reshaped_info{};
460  TensorInfo info_gemm{};
461  TensorInfo weights_reshaped_info{};
462  const ITensorInfo *gemm_input_to_use = input;
463  const ITensorInfo *gemm_output_to_use = output;
464  const ITensorInfo *weights_to_use = weights;
465  const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
466  const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
467  const bool skip_col2im = data_layout == DataLayout::NHWC;
468  bool fuse_activation = true;
469 
470  ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel));
472 
473  // Validate biases
474  if(biases != nullptr)
475  {
476  if(is_quantized)
477  {
479  }
480  else
481  {
483  }
484  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
486  }
487 
488  if(act_info.enabled())
489  {
490  ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a());
491  }
492 
493  // Get convolved dimensions
494  unsigned int conv_w = 0;
495  unsigned int conv_h = 0;
496 
497  std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(idx_width),
498  input->dimension(idx_height),
499  kernel_width,
500  kernel_height,
501  conv_info,
502  dilation);
503 
504  unsigned int mat_weights_cols = num_kernels / num_groups;
505 
506  const ITensorInfo *biases_to_use = biases;
507  bool append_bias = false;
508 
509  if(num_groups != 1 && biases != nullptr)
510  {
511  // num_groups != 1 can only be for NCHW
512  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
513  biases_to_use = nullptr;
514  append_bias = true;
515 
517  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, true, num_groups), 1, data_type);
518  }
519  else
520  {
522  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, false, num_groups), 1, data_type);
523  }
524 
525  weights_to_use = &weights_reshaped_info;
526 
527  if(!skip_im2col)
528  {
529  const Size2D kernel_dims(kernel_width, kernel_height);
530 
531  // Output tensor auto initialization if not yet initialized
532  TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, num_groups == 1, num_groups);
533 
534  auto_init_if_empty(im2col_reshaped_info, input->clone()->set_tensor_shape(expected_output_shape));
535 
536  ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &im2col_reshaped_info, kernel_dims, conv_info, append_bias, dilation, num_groups));
537  gemm_input_to_use = &im2col_reshaped_info;
538  }
539 
540  // Create GEMM output tensor
541  if(!skip_col2im)
542  {
543  TensorShape shape_gemm;
544 
545  shape_gemm = gemm_input_to_use->tensor_shape();
546  shape_gemm.set(0, mat_weights_cols);
547  shape_gemm.set(1, conv_w * conv_h);
548 
549  info_gemm = TensorInfo(shape_gemm, 1, data_type);
550  info_gemm.set_quantization_info(output->quantization_info()).set_data_layout(input->data_layout());
551  gemm_output_to_use = &info_gemm;
552  }
553 
554  GEMMLowpOutputStageInfo gemmlowp_output_stage;
556  gemmlowp_output_stage.gemmlowp_offset = 0;
557  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
558 
559  if(is_quantized)
560  {
561  const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
562  const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
563  const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info;
564  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
565 
566  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
567  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
569  weights,
570  output,
571  idx_kernels,
572  gemmlowp_output_stage.gemmlowp_multipliers.data(),
573  gemmlowp_output_stage.gemmlowp_shifts.data());
574  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
575  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
576 
577  int min_activation = 0;
578  int max_activation = 0;
579 
580  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
583  };
584 
585  if(act_info.enabled())
586  {
587  if(supported_acts.count(act_info.activation()) != 0)
588  {
589  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, output_quant_info);
590  }
591  else
592  {
593  fuse_activation = false;
594  }
595  }
596 
597  // Set the GEMMLowp output stage info
598  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
599  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
600  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
601  }
602 
603  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
604  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
605 
606  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, act_info));
607 
608  // Validate Col2Im
609  if(!skip_col2im)
610  {
611  ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups));
612  }
613 
614  //Validate Activation Layer
615  if(!fuse_activation)
616  {
617  ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info));
618  }
619 
620  return Status{};
621 }
622 
624 {
625  prepare();
626 
627  MemoryGroupResourceScope scope_mg(_memory_group);
628 
629  // Run im2col
630  if(!_skip_im2col)
631  {
632  CLScheduler::get().enqueue(*_im2col_kernel);
633  }
634 
635  // Runs CLGEMM or CLGEMMLowpMatrixMultiplyCore functions
636  if(_is_quantized)
637  {
638  // Run gemmlowp
639  _mm_gemmlowp.run();
640  }
641  else
642  {
643  // Run gemm
644  _mm_gemm.run();
645  }
646 
647  // Reshape output matrix
648  if(!_skip_col2im)
649  {
650  CLScheduler::get().enqueue(*_col2im_kernel.get(), false);
651  }
652 
653  //Run Activation Layer if we cannot fuse in GEMM
654  if(!_fuse_activation)
655  {
656  _activationlayer_function.run();
657  }
658 }
659 
661 {
662  if(!_is_prepared)
663  {
664  ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
665  if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
666  {
667  _weights_manager->run(_original_weights, &_reshape_weights_managed);
668  }
669  else
670  {
671  // Run weights reshaping and mark original weights tensor as unused
672  _weights_reshaped.allocator()->allocate();
673  _reshape_weights.run();
674  _original_weights->mark_as_unused();
675  }
676 
677  // Prepare GEMM
678  _is_quantized ? _mm_gemmlowp.prepare() : _mm_gemm.prepare();
679  if(!_weights_reshaped.is_used())
680  {
681  _weights_reshaped.allocator()->free();
682  }
683 
684  CLScheduler::get().queue().finish();
685  _is_prepared = true;
686  }
687 }
688 } // namespace arm_compute
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:967
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1891
Shape of a tensor.
Definition: TensorShape.h:39
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
Definition: CLGEMM.cpp:870
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: CLTensor.cpp:41
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, unsigned int idx_ofms, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:490
static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLWeightsReshapeKernel.
void prepare() override
Prepare the function for executing.
void run() override
Run the kernels contained in the function.
Definition: CLGEMM.cpp:778
bool enabled() const
Check if initialised.
Definition: Types.h:1528
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
Interface for the im2col reshape kernel.
float a() const
Get the alpha value.
Definition: Types.h:1518
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
Definition: ITensor.cpp:163
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
const DataLayout data_layout
Definition: Im2Col.cpp:151
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLGEMMConvolutionLayer.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: CLTensor.cpp:61
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
Definition: Types.h:1890
Status class.
Definition: Error.h:52
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8.
Definition: Types.h:1894
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1478
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Definition: Types.h:1889
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLCol2ImKernel.
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
Definition: Utils.cpp:395
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.cpp:345
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Definition: Types.h:1898
Convolution Layer Weights Information class.
Definition: Types.h:1693
std::vector< int32_t > gemmlowp_shifts
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1896
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:168
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
const DataType data_type
Definition: Im2Col.cpp:150
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLIm2ColKernel.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
CLGEMMConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: Utils.h:1044
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
Definition: Utils.cpp:459
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
const unsigned int num_groups
Definition: Im2Col.cpp:153
std::vector< int32_t > gemmlowp_multipliers
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1895
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
GEMMLowp output stage info.
Definition: Types.h:1887
Interface for the col2im reshaping kernel.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
Definition: Types.h:650
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Definition: CLScheduler.cpp:39
Weights manager interface to handle weights transformations.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Num samples, channels, height, width.
CLCompileContext class.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Set the input and output tensors.
src_info set_data_layout(data_layout)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:989
static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLConvolutionLayerReshap...
quantized, symmetric per channel fixed-point 8-bit number
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
Definition: Types.h:1892
void configure(const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
Configures the CLConvolutionLayerReshapeWeights function.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
Definition: CLGEMM.cpp:666
OpenCL kernel to perform reshaping on the weights used by convolution and locally connected layer.
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
Definition: CLGEMM.cpp:727
~CLConvolutionLayerReshapeWeights()
Default destructor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
void prepare() override
Prepare the function for executing.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
Store the tensor's metadata.
Definition: TensorInfo.h:43
GEMM information class.
Definition: Types.h:1938
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
ActivationFunction activation() const
Get the type of activation function.
Definition: Types.h:1513
float b() const
Get the beta value.
Definition: Types.h:1523
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups=1)
Set the input and output tensors.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8.
Definition: Types.h:1893
void tune_kernel_static(ICLKernel &kernel)
Tunes OpenCL kernel.
Definition: CLScheduler.cpp:82
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:234
DataType
Available data types.
Definition: Types.h:77
DataLayout
[DataLayout enum definition]
Definition: Types.h:114
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
Definition: Utils.h:564
void run() override
Run the kernels contained in the function.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:79
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
~CLGEMMConvolutionLayer()
Default destructor.