Compute Library
 20.08
CLGEMMConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 #include "arm_compute/core/Utils.h"
34 
35 #include <cmath>
36 #include <memory>
37 #include <tuple>
38 
39 namespace arm_compute
40 {
42 using namespace arm_compute::utils::cast;
43 
45  : _weights_reshape_kernel()
46 {
47 }
48 
50 {
51  configure(CLKernelLibrary::get().get_compile_context(), weights, biases, output, num_groups);
52 }
53 
54 void CLConvolutionLayerReshapeWeights::configure(const CLCompileContext &compile_context, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
55 {
56  // Perform validation step
59  (biases != nullptr) ? biases->info() : nullptr,
60  output->info(),
61  num_groups));
62 
63  const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
64  const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr;
65 
66  _weights_reshape_kernel.configure(compile_context, weights, biases_to_use, output, num_groups);
67 
69 }
70 
72 {
75  ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
76 
77  if(biases != nullptr)
78  {
79  const int idx_kernels = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
81 
83  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
85  }
86 
87  if((output != nullptr) && (output->total_size() != 0))
88  {
91  }
92 
93  return Status{};
94 }
95 
97 {
98  CLScheduler::get().enqueue(_weights_reshape_kernel);
99 }
100 
101 CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
102  : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager, weights_manager),
103  _mm_gemmlowp(memory_manager), _col2im_kernel(), _activationlayer_function(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _skip_im2col(false),
104  _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
105 {
106 }
107 
108 void CLGEMMConvolutionLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
109  const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
110  int gemm_3d_depth, const ActivationLayerInfo &act_info)
111 {
113  ARM_COMPUTE_ERROR_THROW_ON(validate_mm(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info));
114 
115  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
116  false, // is_b_reshaped
117  true, // reshape_b_only_on_first_run
118  gemm_3d_depth, // depth_output_gemm3d
119  _skip_im2col, // reinterpret_input_as_3d
120  false, // retain_internal_weights
121  gemmlowp_output_stage, // gemmlowp_output_stage
122  false, // fp_mixed_precision
123  true, // broadcast_bias
124  act_info); // activation_info
125 
126  if(_is_quantized)
127  {
128  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
129  // Extract and negate input and weights offset
130  const QuantizationInfo input_quantization_info = input->info()->quantization_info();
132 
133  input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
135 
136  _mm_gemmlowp.configure(compile_context, input, weights, biases, output, gemm_info);
137 
138  // Revert back QuantizatioInfo as input and weights could be used in other convolution layers
139  input->info()->set_quantization_info(input_quantization_info);
141  }
142  else
143  {
144  // Configure matrix multiply function
145  _mm_gemm.configure(compile_context, input, weights, biases, output, 1.0f, 1.0f, gemm_info);
146  }
147 }
148 
149 Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
150  const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info)
151 {
152  const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
153 
154  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
155  false, // is_b_reshaped
156  true, // reshape_b_only_on_first_run
157  gemm_3d_depth, // depth_output_gemm3d
158  skip_im2col, // reinterpret_input_as_3d
159  false, // retain_internal_weights
160  gemmlowp_output_stage, // gemmlowp_output_stage
161  false, // fp_mixed_precision
162  true, // broadcast_bias
163  act_info); // activation_info
164 
165  if(is_quantized)
166  {
167  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
168  // Extract and negate input and weights offset
169  const QuantizationInfo input_quantization_info = input->quantization_info();
170  const QuantizationInfo weights_quantization_info = weights->quantization_info();
171 
172  std::unique_ptr<ITensorInfo> input_qa = input->clone();
173  std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
174  input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
175  weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
176 
177  // Perform validation step on GEMMLowp
178  return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info);
179  }
180  else
181  {
182  // Perform validation step on Matrix multiply function
183  return CLGEMM::validate(input, weights, biases, output, 1.0f, 1.0f, gemm_info);
184  }
185 }
186 
188  const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
189 {
190  configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups);
191 }
192 
193 void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
194  const PadStrideInfo &conv_info,
195  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
196 {
198 
200  weights->info(),
201  biases != nullptr ? biases->info() : nullptr,
202  output->info(),
203  conv_info,
204  weights_info,
205  dilation,
206  act_info,
207  num_groups));
208 
209  const DataType data_type = input->info()->data_type();
210  const DataLayout data_layout = input->info()->data_layout();
214 
215  const unsigned int kernel_width = weights->info()->dimension(idx_width);
216  const unsigned int kernel_height = weights->info()->dimension(idx_height);
217  const unsigned int num_kernels = weights->info()->dimension(idx_kernels);
218 
219  const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
220  const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
221 
222  _is_prepared = weights_info.retain_internal_weights();
223  _original_weights = weights;
224  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
225  _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
226  _skip_col2im = data_layout == DataLayout::NHWC;
227 
228  // Only for quantize there are few cases where we cannot fuse the activation function in GEMM
229  _fuse_activation = true;
230 
231  // Set the GPU target for im2col and col2im
232  _im2col_kernel.set_target(CLScheduler::get().target());
233  _col2im_kernel.set_target(CLScheduler::get().target());
234 
235  const ICLTensor *gemm_input_to_use = input;
236  ICLTensor *gemm_output_to_use = output;
237 
238  // Get parameters from conv_info
239  unsigned int stride_x = 0;
240  unsigned int stride_y = 0;
241  std::tie(stride_x, stride_y) = conv_info.stride();
242 
243  // Get convolved dimensions
244  unsigned int conv_w = 0;
245  unsigned int conv_h = 0;
246  std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(idx_width),
247  input->info()->dimension(idx_height),
248  kernel_width,
249  kernel_height,
250  conv_info,
251  dilation);
252 
253  unsigned int mat_weights_cols = num_kernels / num_groups;
254 
255  const ICLTensor *biases_to_use = biases;
256  bool append_bias = false;
257 
258  ICLTensor *weights_to_use = &_weights_reshaped;
259  if(num_groups != 1 && biases != nullptr)
260  {
261  // num_groups != 1 can only be for NCHW
262  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
263  biases_to_use = nullptr;
264  append_bias = true;
265 
266  if(_weights_manager && _weights_manager->are_weights_managed(weights))
267  {
268  _reshape_weights_managed.configure(compile_context, weights, biases, num_groups);
269  weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
270  }
271  else
272  {
273  _reshape_weights.configure(compile_context, weights, biases, &_weights_reshaped, num_groups);
274  }
275  }
276  else
277  {
278  if(_weights_manager && _weights_manager->are_weights_managed(weights))
279  {
280  _reshape_weights_managed.configure(compile_context, weights, nullptr, num_groups);
281  weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
282  }
283  else
284  {
285  _reshape_weights.configure(compile_context, weights, nullptr, &_weights_reshaped, num_groups);
286  }
287  }
288 
289  // Create tensor to store im2col reshaped inputs
290  if(!_skip_im2col)
291  {
292  _memory_group.manage(&_im2col_output);
293 
294  // Configure and tune im2col. im2col output shape is auto-initialized
295  _im2col_kernel.configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
296 
297  // Set quantization info
298  _im2col_output.info()->set_quantization_info(input->info()->quantization_info());
299  CLScheduler::get().tune_kernel_static(_im2col_kernel);
300 
301  // Update GEMM input
302  gemm_input_to_use = &_im2col_output;
303  }
304 
305  // Create GEMM output tensor
306  if(!_skip_col2im)
307  {
308  TensorShape shape_gemm;
309 
310  // If we cannot skip col2im it means we run im2col as well
311  shape_gemm = _im2col_output.info()->tensor_shape();
312  shape_gemm.set(0, mat_weights_cols);
313  shape_gemm.set(1, conv_w * conv_h);
314 
315  TensorInfo info_gemm(shape_gemm, 1, data_type);
316  info_gemm.set_quantization_info(output->info()->quantization_info()).set_data_layout(input->info()->data_layout());
317  _gemm_output.allocator()->init(info_gemm);
318  _memory_group.manage(&_gemm_output);
319 
320  // Update GEMM output
321  gemm_output_to_use = &_gemm_output;
322  }
323 
324  GEMMLowpOutputStageInfo gemmlowp_output_stage;
326  gemmlowp_output_stage.gemmlowp_offset = 0;
327 
328  // Configure output stage for quantized case
329  if(_is_quantized)
330  {
331  const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
332  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->info()->data_type());
333  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
334 
335  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
336 
337  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
338  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
340  weights->info(),
341  output->info(),
342  idx_kernels,
343  gemmlowp_output_stage.gemmlowp_multipliers.data(),
344  gemmlowp_output_stage.gemmlowp_shifts.data());
345  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
346  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
347 
348  PixelValue min_val{};
349  PixelValue max_val{};
350  std::tie(min_val, max_val) = get_min_max(output->info()->data_type());
351 
352  auto min_activation = min_val.get<int32_t>();
353  auto max_activation = max_val.get<int32_t>();
354 
355  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
358  };
359 
360  if(act_info.enabled())
361  {
362  if(supported_acts.count(act_info.activation()) != 0)
363  {
364  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, output_quant_info);
365  }
366  else
367  {
368  _fuse_activation = false;
369  }
370  }
371 
372  // Set the GEMMLowp output stage info
373  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
374  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
375  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
376  }
377 
378  // Configure and tune GEMM
379  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
380  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
381 
382  configure_mm(compile_context, gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, act_info);
383 
384  if(!_skip_im2col)
385  {
386  _im2col_output.allocator()->allocate();
387  }
388 
389  if(!_skip_col2im)
390  {
391  // Configure and tune Col2Im
392  _col2im_kernel.configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
393  CLScheduler::get().tune_kernel_static(_col2im_kernel);
394  }
395 
396  if(!_skip_col2im)
397  {
398  _gemm_output.allocator()->allocate();
399  }
400 
401  ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
402  "Output shape does not match the expected one");
403 
404  if(!_fuse_activation)
405  {
406  _activationlayer_function.configure(compile_context, output, nullptr, act_info);
407  }
408 
410 }
411 
413  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
414 {
416  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
418  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type());
419 
420  if(!is_quantized_per_channel)
421  {
423  }
425  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
426  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_type() == DataType::QASYMM8), "Grouping (num_groups != 1) is not supported with QASYMM8");
427  ARM_COMPUTE_RETURN_ERROR_ON(((input->dimension(2) / weights->dimension(2)) != num_groups) && (input->data_layout() == DataLayout::NCHW));
428 
429  const DataLayout data_layout = input->data_layout();
430  const DataType data_type = input->data_type();
435 
436  const unsigned int kernel_width = weights->dimension(idx_width);
437  const unsigned int kernel_height = weights->dimension(idx_height);
438  const unsigned int num_kernels = weights->dimension(idx_kernels);
439 
440  TensorInfo im2col_reshaped_info{};
441  TensorInfo info_gemm{};
442  TensorInfo weights_reshaped_info{};
443  const ITensorInfo *gemm_input_to_use = input;
444  const ITensorInfo *gemm_output_to_use = output;
445  const ITensorInfo *weights_to_use = weights;
446  const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
447  const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
448  const bool skip_col2im = data_layout == DataLayout::NHWC;
449  bool fuse_activation = true;
450 
451  ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel));
452  ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
453 
454  // Validate biases
455  if(biases != nullptr)
456  {
457  if(is_quantized)
458  {
460  }
461  else
462  {
464  }
465  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
467  }
468 
469  if(act_info.enabled())
470  {
471  ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a());
472  }
473 
474  // Get convolved dimensions
475  unsigned int conv_w = 0;
476  unsigned int conv_h = 0;
477 
478  std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(idx_width),
479  input->dimension(idx_height),
480  kernel_width,
481  kernel_height,
482  conv_info,
483  dilation);
484 
485  unsigned int mat_weights_cols = num_kernels / num_groups;
486 
487  const ITensorInfo *biases_to_use = biases;
488  bool append_bias = false;
489 
490  if(num_groups != 1 && biases != nullptr)
491  {
492  // num_groups != 1 can only be for NCHW
493  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
494  biases_to_use = nullptr;
495  append_bias = true;
496 
498  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, true, num_groups), 1, data_type);
499  }
500  else
501  {
503  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, false, num_groups), 1, data_type);
504  }
505 
506  weights_to_use = &weights_reshaped_info;
507 
508  if(!skip_im2col)
509  {
510  const Size2D kernel_dims(kernel_width, kernel_height);
511 
512  // Output tensor auto initialization if not yet initialized
513  TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, num_groups == 1, num_groups);
514 
515  auto_init_if_empty(im2col_reshaped_info, input->clone()->set_tensor_shape(expected_output_shape));
516 
517  ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &im2col_reshaped_info, kernel_dims, conv_info, append_bias, dilation, num_groups));
518  gemm_input_to_use = &im2col_reshaped_info;
519  }
520 
521  // Create GEMM output tensor
522  if(!skip_col2im)
523  {
524  TensorShape shape_gemm;
525 
526  shape_gemm = gemm_input_to_use->tensor_shape();
527  shape_gemm.set(0, mat_weights_cols);
528  shape_gemm.set(1, conv_w * conv_h);
529 
530  info_gemm = TensorInfo(shape_gemm, 1, data_type);
531  info_gemm.set_quantization_info(output->quantization_info()).set_data_layout(input->data_layout());
532  gemm_output_to_use = &info_gemm;
533  }
534 
535  GEMMLowpOutputStageInfo gemmlowp_output_stage;
537  gemmlowp_output_stage.gemmlowp_offset = 0;
538  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
539 
540  if(is_quantized)
541  {
542  const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
543  const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
544  const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info;
545  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
546 
547  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
548  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
550  weights,
551  output,
552  idx_kernels,
553  gemmlowp_output_stage.gemmlowp_multipliers.data(),
554  gemmlowp_output_stage.gemmlowp_shifts.data());
555  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
556  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
557 
558  int min_activation = 0;
559  int max_activation = 0;
560 
561  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
564  };
565 
566  if(act_info.enabled())
567  {
568  if(supported_acts.count(act_info.activation()) != 0)
569  {
570  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, output_quant_info);
571  }
572  else
573  {
574  fuse_activation = false;
575  }
576  }
577 
578  // Set the GEMMLowp output stage info
579  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
580  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
581  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
582  }
583 
584  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
585  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
586 
587  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, act_info));
588 
589  // Validate Col2Im
590  if(!skip_col2im)
591  {
592  ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups));
593  }
594 
595  //Validate Activation Layer
596  if(!fuse_activation)
597  {
598  ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info));
599  }
600 
601  return Status{};
602 }
603 
605 {
606  prepare();
607 
608  MemoryGroupResourceScope scope_mg(_memory_group);
609 
610  // Run im2col
611  if(!_skip_im2col)
612  {
613  CLScheduler::get().enqueue(_im2col_kernel);
614  }
615 
616  // Runs CLGEMM or CLGEMMLowpMatrixMultiplyCore functions
617  if(_is_quantized)
618  {
619  // Run gemmlowp
620  _mm_gemmlowp.run();
621  }
622  else
623  {
624  // Run gemm
625  _mm_gemm.run();
626  }
627 
628  // Reshape output matrix
629  if(!_skip_col2im)
630  {
631  CLScheduler::get().enqueue(_col2im_kernel, false);
632  }
633 
634  //Run Activation Layer if we cannot fuse in GEMM
635  if(!_fuse_activation)
636  {
637  _activationlayer_function.run();
638  }
639 }
640 
642 {
643  if(!_is_prepared)
644  {
645  ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
646  if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
647  {
648  _weights_manager->run(_original_weights, &_reshape_weights_managed);
649  }
650  else
651  {
652  // Run weights reshaping and mark original weights tensor as unused
653  _weights_reshaped.allocator()->allocate();
654  _reshape_weights.run();
655  _original_weights->mark_as_unused();
656  }
657 
658  // Prepare GEMM
659  _is_quantized ? _mm_gemmlowp.prepare() : _mm_gemm.prepare();
660  if(!_weights_reshaped.is_used())
661  {
662  _weights_reshaped.allocator()->free();
663  }
664 
665  CLScheduler::get().queue().finish();
666  _is_prepared = true;
667  }
668 }
669 } // namespace arm_compute
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:1121
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Set the input and output of the kernel.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1885
Shape of a tensor.
Definition: TensorShape.h:39
const DataLayout data_layout
Definition: Im2Col.cpp:146
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
Definition: CLGEMM.cpp:683
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: CLTensor.cpp:41
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, unsigned int idx_ofms, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLWeightsReshapeKernel.
void prepare() override
Prepare the function for executing.
void run() override
Run the kernels contained in the function.
Definition: CLGEMM.cpp:602
bool enabled() const
Check if initialised.
Definition: Types.h:1567
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:99
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:494
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
float a() const
Get the alpha value.
Definition: Types.h:1557
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
Definition: ITensor.cpp:163
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLGEMMConvolutionLayer.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: CLTensor.cpp:61
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
Definition: Types.h:1884
Status class.
Definition: Error.h:52
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8.
Definition: Types.h:1888
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1517
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Definition: Types.h:1883
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2020 Arm Limited.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: Helpers.inl:207
1 channel, 1 F16 per channel
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLCol2ImKernel.
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
Definition: Utils.cpp:395
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.cpp:372
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: Tensor.cpp:33
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Definition: Types.h:1892
Convolution Layer Weights Information class.
Definition: Types.h:1694
std::vector< int32_t > gemmlowp_shifts
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1890
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:168
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLIm2ColKernel.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
CLGEMMConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: Utils.h:1198
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
Definition: Utils.cpp:459
void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups=1)
Set the input and output of the kernel.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
const unsigned int num_groups
Definition: Im2Col.cpp:148
std::vector< int32_t > gemmlowp_multipliers
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1889
UniformQuantizationInfo uniform() const
Return per layer quantization info.
GEMMLowp output stage info.
Definition: Types.h:1881
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
Definition: Types.h:689
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Definition: CLScheduler.cpp:41
Weights manager interface to handle weights transformations.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Num samples, channels, height, width.
CLCompileContext class.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Set the input and output tensors.
src_info set_data_layout(data_layout)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1143
static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLConvolutionLayerReshap...
quantized, symmetric per channel fixed-point 8-bit number
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
Definition: Types.h:1886
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
void configure(const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
Configures the CLConvolutionLayerReshapeWeights function.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
Definition: CLGEMM.cpp:497
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Num samples, height, width, channels.
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
const QuantizationInfo weights_quantization_info
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
Definition: CLGEMM.cpp:556
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:78
void prepare() override
Prepare the function for executing.
Store the tensor's metadata.
Definition: TensorInfo.h:45
void set_target(GPUTarget target)
Set the targeted GPU architecture.
Definition: ICLKernel.h:287
GEMM information class.
Definition: Types.h:1932
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
ActivationFunction activation() const
Get the type of activation function.
Definition: Types.h:1552
float b() const
Get the beta value.
Definition: Types.h:1562
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups=1)
Set the input and output tensors.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:332
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8.
Definition: Types.h:1887
void tune_kernel_static(ICLKernel &kernel)
Tunes OpenCL kernel.
Definition: CLScheduler.cpp:79
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:261
DataType
Available data types.
Definition: Types.h:77
DataLayout
[DataLayout enum definition]
Definition: Types.h:120
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
Definition: Utils.h:560
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups=1)
Set the input and output of the kernel.
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.