Compute Library
 20.05
CLGEMMConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 #include "arm_compute/core/Utils.h"
34 
35 #include <cmath>
36 #include <memory>
37 #include <tuple>
38 
39 namespace arm_compute
40 {
42 using namespace arm_compute::utils::cast;
43 
45  : _weights_reshape_kernel()
46 {
47 }
48 
50 {
51  configure(CLKernelLibrary::get().get_compile_context(), weights, biases, output, num_groups);
52 }
53 
54 void CLConvolutionLayerReshapeWeights::configure(const CLCompileContext &compile_context, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
55 {
56  // Perform validation step
59  (biases != nullptr) ? biases->info() : nullptr,
60  output->info(),
61  num_groups));
62 
63  const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
64  const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr;
65 
66  _weights_reshape_kernel.configure(compile_context, weights, biases_to_use, output, num_groups);
67 
69 }
70 
72 {
75  ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
76 
77  if(biases != nullptr)
78  {
79  const int idx_kernels = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
81 
83  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
85  }
86 
87  if((output != nullptr) && (output->total_size() != 0))
88  {
91  }
92 
93  return Status{};
94 }
95 
97 {
98  CLScheduler::get().enqueue(_weights_reshape_kernel);
99 }
100 
101 CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
102  : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager, weights_manager),
103  _mm_gemmlowp(memory_manager), _col2im_kernel(), _activationlayer_function(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _skip_im2col(false),
104  _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
105 {
106 }
107 
108 void CLGEMMConvolutionLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
109  const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
110  int gemm_3d_depth, const ActivationLayerInfo &act_info)
111 {
113  ARM_COMPUTE_ERROR_THROW_ON(validate_mm(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info));
114 
115  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
116  false, // is_b_reshaped
117  true, // reshape_b_only_on_first_run
118  gemm_3d_depth, // depth_output_gemm3d
119  _skip_im2col, // reinterpret_input_as_3d
120  false, // retain_internal_weights
121  gemmlowp_output_stage, // gemmlowp_output_stage
122  false, // fp_mixed_precision
123  true, // broadcast_bias
124  act_info); // activation_info
125 
126  if(_is_quantized)
127  {
128  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
129  // Extract and negate input and weights offset
130  const QuantizationInfo input_quantization_info = input->info()->quantization_info();
132 
133  input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
135 
136  _mm_gemmlowp.configure(compile_context, input, weights, biases, output, gemm_info);
137 
138  // Revert back QuantizatioInfo as input and weights could be used in other convolution layers
139  input->info()->set_quantization_info(input_quantization_info);
141  }
142  else
143  {
144  // Configure matrix multiply function
145  _mm_gemm.configure(compile_context, input, weights, biases, output, 1.0f, 1.0f, gemm_info);
146  }
147 }
148 
149 Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
150  const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info)
151 {
152  const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
153 
154  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
155  false, // is_b_reshaped
156  true, // reshape_b_only_on_first_run
157  gemm_3d_depth, // depth_output_gemm3d
158  skip_im2col, // reinterpret_input_as_3d
159  false, // retain_internal_weights
160  gemmlowp_output_stage, // gemmlowp_output_stage
161  false, // fp_mixed_precision
162  true, // broadcast_bias
163  act_info); // activation_info
164 
165  if(is_quantized)
166  {
167  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
168  // Extract and negate input and weights offset
169  const QuantizationInfo input_quantization_info = input->quantization_info();
170  const QuantizationInfo weights_quantization_info = weights->quantization_info();
171 
172  std::unique_ptr<ITensorInfo> input_qa = input->clone();
173  std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
174  input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
175  weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
176 
177  // Perform validation step on GEMMLowp
178  return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info);
179  }
180  else
181  {
182  // Perform validation step on Matrix multiply function
183  return CLGEMM::validate(input, weights, biases, output, 1.0f, 1.0f, gemm_info);
184  }
185 }
186 
188  const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
189 {
190  configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups);
191 }
192 
193 void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
194  const PadStrideInfo &conv_info,
195  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
196 {
198 
200  weights->info(),
201  biases != nullptr ? biases->info() : nullptr,
202  output->info(),
203  conv_info,
204  weights_info,
205  dilation,
206  act_info,
207  num_groups));
208 
209  const DataType data_type = input->info()->data_type();
210  const DataLayout data_layout = input->info()->data_layout();
214 
215  const unsigned int kernel_width = weights->info()->dimension(idx_width);
216  const unsigned int kernel_height = weights->info()->dimension(idx_height);
217  const unsigned int num_kernels = weights->info()->dimension(idx_kernels);
218 
219  const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
220  const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
221 
222  _is_prepared = weights_info.retain_internal_weights();
223  _original_weights = weights;
224  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
225  _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
226  _skip_col2im = data_layout == DataLayout::NHWC;
227 
228  // Only for quantize there are few cases where we cannot fuse the activation function in GEMM
229  _fuse_activation = true;
230 
231  // Set the GPU target for im2col and col2im
232  _im2col_kernel.set_target(CLScheduler::get().target());
233  _col2im_kernel.set_target(CLScheduler::get().target());
234 
235  const ICLTensor *gemm_input_to_use = input;
236  ICLTensor *gemm_output_to_use = output;
237 
238  // Get parameters from conv_info
239  unsigned int stride_x = 0;
240  unsigned int stride_y = 0;
241  std::tie(stride_x, stride_y) = conv_info.stride();
242 
243  // Get convolved dimensions
244  unsigned int conv_w = 0;
245  unsigned int conv_h = 0;
246  std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(idx_width),
247  input->info()->dimension(idx_height),
248  kernel_width,
249  kernel_height,
250  conv_info,
251  dilation);
252 
253  unsigned int mat_weights_cols = num_kernels / num_groups;
254 
255  const ICLTensor *biases_to_use = biases;
256  bool append_bias = false;
257 
258  ICLTensor *weights_to_use = &_weights_reshaped;
259  if(num_groups != 1 && biases != nullptr)
260  {
261  // num_groups != 1 can only be for NCHW
262  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
263  biases_to_use = nullptr;
264  append_bias = true;
265 
266  if(_weights_manager && _weights_manager->are_weights_managed(weights))
267  {
268  _reshape_weights_managed.configure(compile_context, weights, biases, num_groups);
269  weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
270  }
271  else
272  {
273  _reshape_weights.configure(compile_context, weights, biases, &_weights_reshaped, num_groups);
274  }
275  }
276  else
277  {
278  if(_weights_manager && _weights_manager->are_weights_managed(weights))
279  {
280  _reshape_weights_managed.configure(compile_context, weights, nullptr, num_groups);
281  weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
282  }
283  else
284  {
285  _reshape_weights.configure(compile_context, weights, nullptr, &_weights_reshaped, num_groups);
286  }
287  }
288 
289  // Create tensor to store im2col reshaped inputs
290  if(!_skip_im2col)
291  {
292  _memory_group.manage(&_im2col_output);
293 
294  // Configure and tune im2col. im2col output shape is auto-initialized
295  _im2col_kernel.configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
296 
297  // Set quantization info
298  _im2col_output.info()->set_quantization_info(input->info()->quantization_info());
299  CLScheduler::get().tune_kernel_static(_im2col_kernel);
300 
301  // Update GEMM input
302  gemm_input_to_use = &_im2col_output;
303  }
304 
305  // Create GEMM output tensor
306  if(!_skip_col2im)
307  {
308  TensorShape shape_gemm;
309 
310  // If we cannot skip col2im it means we run im2col as well
311  shape_gemm = _im2col_output.info()->tensor_shape();
312  shape_gemm.set(0, mat_weights_cols);
313  shape_gemm.set(1, conv_w * conv_h);
314 
315  // TODO(COMPMID-2078): input->clone() doesn't work with subtensors for grouped convolutions.
316  TensorInfo info_gemm(shape_gemm, 1, data_type);
317  info_gemm.set_quantization_info(output->info()->quantization_info()).set_data_layout(input->info()->data_layout());
318  _gemm_output.allocator()->init(info_gemm);
319  _memory_group.manage(&_gemm_output);
320 
321  // Update GEMM output
322  gemm_output_to_use = &_gemm_output;
323  }
324 
325  GEMMLowpOutputStageInfo gemmlowp_output_stage;
327  gemmlowp_output_stage.gemmlowp_offset = 0;
328 
329  // Configure output stage for quantized case
330  if(_is_quantized)
331  {
332  const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
333  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->info()->data_type());
334  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
335 
336  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
337 
338  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
339  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
341  weights->info(),
342  output->info(),
343  idx_kernels,
344  gemmlowp_output_stage.gemmlowp_multipliers.data(),
345  gemmlowp_output_stage.gemmlowp_shifts.data());
346  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
347  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
348 
349  PixelValue min_val{};
350  PixelValue max_val{};
351  std::tie(min_val, max_val) = get_min_max(output->info()->data_type());
352 
353  auto min_activation = min_val.get<int32_t>();
354  auto max_activation = max_val.get<int32_t>();
355 
356  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
359  };
360 
361  if(act_info.enabled())
362  {
363  if(supported_acts.count(act_info.activation()) != 0)
364  {
365  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, output_quant_info);
366  }
367  else
368  {
369  _fuse_activation = false;
370  }
371  }
372 
373  // Set the GEMMLowp output stage info
374  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
375  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
376  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
377  }
378 
379  // Configure and tune GEMM
380  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
381  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
382 
383  configure_mm(compile_context, gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, act_info);
384 
385  if(!_skip_im2col)
386  {
387  _im2col_output.allocator()->allocate();
388  }
389 
390  if(!_skip_col2im)
391  {
392  // Configure and tune Col2Im
393  _col2im_kernel.configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
394  CLScheduler::get().tune_kernel_static(_col2im_kernel);
395  }
396 
397  if(!_skip_col2im)
398  {
399  _gemm_output.allocator()->allocate();
400  }
401 
402  ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
403  "Output shape does not match the expected one");
404 
405  if(!_fuse_activation)
406  {
407  _activationlayer_function.configure(compile_context, output, nullptr, act_info);
408  }
409 
411 }
412 
414  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
415 {
417  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
419  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type());
420 
421  if(is_quantized_per_channel)
422  {
423  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() != DataType::QASYMM8, "Input data type not compatible with Weights");
424  }
425  else
426  {
428  }
430  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
431  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_type() == DataType::QASYMM8), "Grouping (num_groups != 1) is not supported with QASYMM8");
432  ARM_COMPUTE_RETURN_ERROR_ON(((input->dimension(2) / weights->dimension(2)) != num_groups) && (input->data_layout() == DataLayout::NCHW));
433 
434  const DataLayout data_layout = input->data_layout();
435  const DataType data_type = input->data_type();
440 
441  const unsigned int kernel_width = weights->dimension(idx_width);
442  const unsigned int kernel_height = weights->dimension(idx_height);
443  const unsigned int num_kernels = weights->dimension(idx_kernels);
444 
445  TensorInfo im2col_reshaped_info{};
446  TensorInfo info_gemm{};
447  TensorInfo weights_reshaped_info{};
448  const ITensorInfo *gemm_input_to_use = input;
449  const ITensorInfo *gemm_output_to_use = output;
450  const ITensorInfo *weights_to_use = weights;
451  const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
452  const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
453  const bool skip_col2im = data_layout == DataLayout::NHWC;
454  bool fuse_activation = true;
455 
456  ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel));
457  ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
458 
459  // Validate biases
460  if(biases != nullptr)
461  {
462  if(is_quantized)
463  {
465  }
466  else
467  {
469  }
470  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
472  }
473 
474  if(act_info.enabled())
475  {
476  ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a());
477  }
478 
479  // Get convolved dimensions
480  unsigned int conv_w = 0;
481  unsigned int conv_h = 0;
482 
483  std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(idx_width),
484  input->dimension(idx_height),
485  kernel_width,
486  kernel_height,
487  conv_info,
488  dilation);
489 
490  unsigned int mat_weights_cols = num_kernels / num_groups;
491 
492  const ITensorInfo *biases_to_use = biases;
493  bool append_bias = false;
494 
495  if(num_groups != 1 && biases != nullptr)
496  {
497  // num_groups != 1 can only be for NCHW
498  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
499  biases_to_use = nullptr;
500  append_bias = true;
501 
503  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, true, num_groups), 1, data_type);
504  }
505  else
506  {
508  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, false, num_groups), 1, data_type);
509  }
510 
511  weights_to_use = &weights_reshaped_info;
512 
513  if(!skip_im2col)
514  {
515  const Size2D kernel_dims(kernel_width, kernel_height);
516 
517  // Output tensor auto initialization if not yet initialized
518  TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, num_groups == 1, num_groups);
519 
520  auto_init_if_empty(im2col_reshaped_info, input->clone()->set_tensor_shape(expected_output_shape));
521 
522  ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &im2col_reshaped_info, kernel_dims, conv_info, append_bias, dilation, num_groups));
523  gemm_input_to_use = &im2col_reshaped_info;
524  }
525 
526  // Create GEMM output tensor
527  if(!skip_col2im)
528  {
529  TensorShape shape_gemm;
530 
531  shape_gemm = gemm_input_to_use->tensor_shape();
532  shape_gemm.set(0, mat_weights_cols);
533  shape_gemm.set(1, conv_w * conv_h);
534 
535  info_gemm = TensorInfo(shape_gemm, 1, data_type);
536  info_gemm.set_quantization_info(output->quantization_info()).set_data_layout(input->data_layout());
537  gemm_output_to_use = &info_gemm;
538  }
539 
540  GEMMLowpOutputStageInfo gemmlowp_output_stage;
542  gemmlowp_output_stage.gemmlowp_offset = 0;
543  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
544 
545  if(is_quantized)
546  {
547  const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
548  const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
549  const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info;
550  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
551 
552  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
553  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
555  weights,
556  output,
557  idx_kernels,
558  gemmlowp_output_stage.gemmlowp_multipliers.data(),
559  gemmlowp_output_stage.gemmlowp_shifts.data());
560  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
561  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
562 
563  int min_activation = 0;
564  int max_activation = 0;
565 
566  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
569  };
570 
571  if(act_info.enabled())
572  {
573  if(supported_acts.count(act_info.activation()) != 0)
574  {
575  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, output_quant_info);
576  }
577  else
578  {
579  fuse_activation = false;
580  }
581  }
582 
583  // Set the GEMMLowp output stage info
584  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
585  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
586  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
587  }
588 
589  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
590  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
591 
592  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, act_info));
593 
594  // Validate Col2Im
595  if(!skip_col2im)
596  {
597  ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups));
598  }
599 
600  //Validate Activation Layer
601  if(!fuse_activation)
602  {
603  ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info));
604  }
605 
606  return Status{};
607 }
608 
610 {
611  prepare();
612 
613  MemoryGroupResourceScope scope_mg(_memory_group);
614 
615  // Run im2col
616  if(!_skip_im2col)
617  {
618  CLScheduler::get().enqueue(_im2col_kernel);
619  }
620 
621  // Runs CLGEMM or CLGEMMLowpMatrixMultiplyCore functions
622  if(_is_quantized)
623  {
624  // Run gemmlowp
625  _mm_gemmlowp.run();
626  }
627  else
628  {
629  // Run gemm
630  _mm_gemm.run();
631  }
632 
633  // Reshape output matrix
634  if(!_skip_col2im)
635  {
636  CLScheduler::get().enqueue(_col2im_kernel, false);
637  }
638 
639  //Run Activation Layer if we cannot fuse in GEMM
640  if(!_fuse_activation)
641  {
642  _activationlayer_function.run();
643  }
644 }
645 
647 {
648  if(!_is_prepared)
649  {
650  ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
651  if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
652  {
653  _weights_manager->run(_original_weights, &_reshape_weights_managed);
654  }
655  else
656  {
657  // Run weights reshaping and mark original weights tensor as unused
658  _weights_reshaped.allocator()->allocate();
659  _reshape_weights.run();
660  _original_weights->mark_as_unused();
661  }
662 
663  // Prepare GEMM
664  _is_quantized ? _mm_gemmlowp.prepare() : _mm_gemm.prepare();
665  if(!_weights_reshaped.is_used())
666  {
667  _weights_reshaped.allocator()->free();
668  }
669 
670  CLScheduler::get().queue().finish();
671  _is_prepared = true;
672  }
673 }
674 } // namespace arm_compute
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:1131
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Set the input and output of the kernel.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1885
Shape of a tensor.
Definition: TensorShape.h:39
const DataLayout data_layout
Definition: Im2Col.cpp:146
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
Definition: CLGEMM.cpp:675
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: CLTensor.cpp:41
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, unsigned int idx_ofms, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLWeightsReshapeKernel.
void prepare() override
Prepare the function for executing.
void run() override
Run the kernels contained in the function.
Definition: CLGEMM.cpp:594
bool enabled() const
Check if initialised.
Definition: Types.h:1567
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:99
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:494
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
float a() const
Get the alpha value.
Definition: Types.h:1557
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
Definition: ITensor.cpp:162
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLGEMMConvolutionLayer.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: CLTensor.cpp:61
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
Definition: Types.h:1884
Status class.
Definition: Error.h:52
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8.
Definition: Types.h:1888
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1517
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Definition: Types.h:1883
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2020 ARM Limited.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: Helpers.inl:202
1 channel, 1 F16 per channel
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLCol2ImKernel.
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
Definition: Utils.cpp:395
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.cpp:372
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: Tensor.cpp:33
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Definition: Types.h:1892
Convolution Layer Weights Information class.
Definition: Types.h:1694
std::vector< int32_t > gemmlowp_shifts
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1890
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:167
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLIm2ColKernel.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
CLGEMMConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
void run() override final
Run the kernels contained in the function.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: Utils.h:1208
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
Definition: Utils.cpp:471
void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups=1)
Set the input and output of the kernel.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
const unsigned int num_groups
Definition: Im2Col.cpp:148
std::vector< int32_t > gemmlowp_multipliers
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1889
UniformQuantizationInfo uniform() const
Return per layer quantization info.
GEMMLowp output stage info.
Definition: Types.h:1881
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
Definition: Types.h:689
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Definition: CLScheduler.cpp:41
Weights manager interface to handle weights transformations.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Num samples, channels, height, width.
CLCompileContext class.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Set the input and output tensors.
src_info set_data_layout(data_layout)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1153
static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLConvolutionLayerReshap...
quantized, symmetric per channel fixed-point 8-bit number
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
Definition: Types.h:1886
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
void configure(const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
Configures the CLConvolutionLayerReshapeWeights function.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
Definition: CLGEMM.cpp:489
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Num samples, height, width, channels.
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
const QuantizationInfo weights_quantization_info
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
Definition: CLGEMM.cpp:548
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:78
void prepare() override
Prepare the function for executing.
Store the tensor's metadata.
Definition: TensorInfo.h:45
void set_target(GPUTarget target)
Set the targeted GPU architecture.
Definition: ICLKernel.h:271
GEMM information class.
Definition: Types.h:1931
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
ActivationFunction activation() const
Get the type of activation function.
Definition: Types.h:1552
float b() const
Get the beta value.
Definition: Types.h:1562
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups=1)
Set the input and output tensors.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:327
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8.
Definition: Types.h:1887
void tune_kernel_static(ICLKernel &kernel)
Tunes OpenCL kernel.
Definition: CLScheduler.cpp:79
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:261
DataType
Available data types.
Definition: Types.h:77
DataLayout
[DataLayout enum definition]
Definition: Types.h:120
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
Definition: Utils.h:560
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups=1)
Set the input and output of the kernel.
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.