Compute Library
 20.02.1
CLGEMMConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 #include "arm_compute/core/Utils.h"
34 
35 #include <cmath>
36 #include <memory>
37 #include <tuple>
38 
39 namespace arm_compute
40 {
42 using namespace arm_compute::utils::cast;
43 
45  : _weights_reshape_kernel()
46 {
47 }
48 
50 {
51  // Perform validation step
54  (biases != nullptr) ? biases->info() : nullptr,
55  output->info(),
56  num_groups));
57 
58  const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
59  const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr;
60 
61  _weights_reshape_kernel.configure(weights, biases_to_use, output, num_groups);
62 
64 }
65 
67 {
70  ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
71 
72  if(biases != nullptr)
73  {
74  const int idx_kernels = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
76 
78  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
80  }
81 
82  if((output != nullptr) && (output->total_size() != 0))
83  {
86  }
87 
88  return Status{};
89 }
90 
92 {
93  CLScheduler::get().enqueue(_weights_reshape_kernel);
94 }
95 
96 CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
97  : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager, weights_manager),
98  _mm_gemmlowp(memory_manager), _col2im_kernel(), _activationlayer_function(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _skip_im2col(false),
99  _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
100 {
101 }
102 
103 void CLGEMMConvolutionLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
104  int gemm_3d_depth, const ActivationLayerInfo &act_info)
105 {
107  ARM_COMPUTE_ERROR_THROW_ON(validate_mm(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info));
108 
109  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
110  false, // is_b_reshaped
111  true, // reshape_b_only_on_first_run
112  gemm_3d_depth, // depth_output_gemm3d
113  _skip_im2col, // reinterpret_input_as_3d
114  false, // retain_internal_weights
115  gemmlowp_output_stage, // gemmlowp_output_stage
116  false, // fp_mixed_precision
117  true, // broadcast_bias
118  act_info); // activation_info
119 
120  if(_is_quantized)
121  {
122  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
123  // Extract and negate input and weights offset
124  const QuantizationInfo input_quantization_info = input->info()->quantization_info();
126 
127  input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
129 
130  _mm_gemmlowp.configure(input, weights, biases, output, gemm_info);
131 
132  // Revert back QuantizatioInfo as input and weights could be used in other convolution layers
133  input->info()->set_quantization_info(input_quantization_info);
135  }
136  else
137  {
138  // Configure matrix multiply function
139  _mm_gemm.configure(input, weights, biases, output, 1.0f, 1.0f, gemm_info);
140  }
141 }
142 
143 Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
144  const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info)
145 {
146  const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
147 
148  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
149  false, // is_b_reshaped
150  true, // reshape_b_only_on_first_run
151  gemm_3d_depth, // depth_output_gemm3d
152  skip_im2col, // reinterpret_input_as_3d
153  false, // retain_internal_weights
154  gemmlowp_output_stage, // gemmlowp_output_stage
155  false, // fp_mixed_precision
156  true, // broadcast_bias
157  act_info); // activation_info
158 
159  if(is_quantized)
160  {
161  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
162  // Extract and negate input and weights offset
163  const QuantizationInfo input_quantization_info = input->quantization_info();
164  const QuantizationInfo weights_quantization_info = weights->quantization_info();
165 
166  std::unique_ptr<ITensorInfo> input_qa = input->clone();
167  std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
168  input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
169  weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
170 
171  // Perform validation step on GEMMLowp
172  return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info);
173  }
174  else
175  {
176  // Perform validation step on Matrix multiply function
177  return CLGEMM::validate(input, weights, biases, output, 1.0f, 1.0f, gemm_info);
178  }
179 }
180 
182  const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
183 {
185 
187  weights->info(),
188  biases != nullptr ? biases->info() : nullptr,
189  output->info(),
190  conv_info,
191  weights_info,
192  dilation,
193  act_info,
194  num_groups));
195 
196  const DataType data_type = input->info()->data_type();
197  const DataLayout data_layout = input->info()->data_layout();
201 
202  const unsigned int kernel_width = weights->info()->dimension(idx_width);
203  const unsigned int kernel_height = weights->info()->dimension(idx_height);
204  const unsigned int num_kernels = weights->info()->dimension(idx_kernels);
205 
206  const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
207  const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
208 
209  _is_prepared = weights_info.retain_internal_weights();
210  _original_weights = weights;
211  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
212  _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
213  _skip_col2im = data_layout == DataLayout::NHWC;
214 
215  // Only for quantize there are few cases where we cannot fuse the activation function in GEMM
216  _fuse_activation = true;
217 
218  // Set the GPU target for im2col and col2im
219  _im2col_kernel.set_target(CLScheduler::get().target());
220  _col2im_kernel.set_target(CLScheduler::get().target());
221 
222  const ICLTensor *gemm_input_to_use = input;
223  ICLTensor *gemm_output_to_use = output;
224 
225  // Get parameters from conv_info
226  unsigned int stride_x = 0;
227  unsigned int stride_y = 0;
228  std::tie(stride_x, stride_y) = conv_info.stride();
229 
230  // Get convolved dimensions
231  unsigned int conv_w = 0;
232  unsigned int conv_h = 0;
233  std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(idx_width),
234  input->info()->dimension(idx_height),
235  kernel_width,
236  kernel_height,
237  conv_info,
238  dilation);
239 
240  unsigned int mat_weights_cols = num_kernels / num_groups;
241 
242  const ICLTensor *biases_to_use = biases;
243  bool append_bias = false;
244 
245  ICLTensor *weights_to_use = &_weights_reshaped;
246  if(num_groups != 1 && biases != nullptr)
247  {
248  // num_groups != 1 can only be for NCHW
249  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
250  biases_to_use = nullptr;
251  append_bias = true;
252 
253  if(_weights_manager && _weights_manager->are_weights_managed(weights))
254  {
255  _reshape_weights_managed.configure(weights, biases, num_groups);
256  weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
257  }
258  else
259  {
260  _reshape_weights.configure(weights, biases, &_weights_reshaped, num_groups);
261  }
262  }
263  else
264  {
265  if(_weights_manager && _weights_manager->are_weights_managed(weights))
266  {
267  _reshape_weights_managed.configure(weights, nullptr, num_groups);
268  weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
269  }
270  else
271  {
272  _reshape_weights.configure(weights, nullptr, &_weights_reshaped, num_groups);
273  }
274  }
275 
276  // Create tensor to store im2col reshaped inputs
277  if(!_skip_im2col)
278  {
279  _memory_group.manage(&_im2col_output);
280 
281  // Configure and tune im2col. im2col output shape is auto-initialized
282  _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
283 
284  // Set quantization info
285  _im2col_output.info()->set_quantization_info(input->info()->quantization_info());
286  CLScheduler::get().tune_kernel_static(_im2col_kernel);
287 
288  // Update GEMM input
289  gemm_input_to_use = &_im2col_output;
290  }
291 
292  // Create GEMM output tensor
293  if(!_skip_col2im)
294  {
295  TensorShape shape_gemm;
296 
297  // If we cannot skip col2im it means we run im2col as well
298  shape_gemm = _im2col_output.info()->tensor_shape();
299  shape_gemm.set(0, mat_weights_cols);
300  shape_gemm.set(1, conv_w * conv_h);
301 
302  // TODO(COMPMID-2078): input->clone() doesn't work with subtensors for grouped convolutions.
303  TensorInfo info_gemm(shape_gemm, 1, data_type);
304  info_gemm.set_quantization_info(output->info()->quantization_info()).set_data_layout(input->info()->data_layout());
305  _gemm_output.allocator()->init(info_gemm);
306  _memory_group.manage(&_gemm_output);
307 
308  // Update GEMM output
309  gemm_output_to_use = &_gemm_output;
310  }
311 
312  GEMMLowpOutputStageInfo gemmlowp_output_stage;
314  gemmlowp_output_stage.gemmlowp_offset = 0;
315 
316  // Configure output stage for quantized case
317  if(_is_quantized)
318  {
319  const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
320  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->info()->data_type());
321  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
322 
323  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
324 
325  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
326  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
328  weights->info(),
329  output->info(),
330  idx_kernels,
331  gemmlowp_output_stage.gemmlowp_multipliers.data(),
332  gemmlowp_output_stage.gemmlowp_shifts.data());
333  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
334  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
335 
336  int min_activation = 0;
337  int max_activation = 0;
338 
339  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
342  };
343 
344  if(act_info.enabled())
345  {
346  if(supported_acts.count(act_info.activation()) != 0)
347  {
348  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, output_quant_info);
349  }
350  else
351  {
352  _fuse_activation = false;
353  }
354  }
355 
356  // Set the GEMMLowp output stage info
357  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
358  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
359  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
360  }
361 
362  // Configure and tune GEMM
363  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
364  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
365 
366  configure_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, act_info);
367 
368  if(!_skip_im2col)
369  {
370  _im2col_output.allocator()->allocate();
371  }
372 
373  if(!_skip_col2im)
374  {
375  // Configure and tune Col2Im
376  _col2im_kernel.configure(gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
377  CLScheduler::get().tune_kernel_static(_col2im_kernel);
378  }
379 
380  if(!_skip_col2im)
381  {
382  _gemm_output.allocator()->allocate();
383  }
384 
385  ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
386  "Output shape does not match the expected one");
387 
388  if(!_fuse_activation)
389  {
390  _activationlayer_function.configure(output, nullptr, act_info);
391  }
392 
394 }
395 
397  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
398 {
400  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
402  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type());
403 
404  if(is_quantized_per_channel)
405  {
406  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() != DataType::QASYMM8, "Input data type not compatible with Weights");
407  }
408  else
409  {
411  }
413  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
414  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_type() == DataType::QASYMM8), "Grouping (num_groups != 1) is not supported with QASYMM8");
415  ARM_COMPUTE_RETURN_ERROR_ON(((input->dimension(2) / weights->dimension(2)) != num_groups) && (input->data_layout() == DataLayout::NCHW));
416 
417  const DataLayout data_layout = input->data_layout();
418  const DataType data_type = input->data_type();
423 
424  const unsigned int kernel_width = weights->dimension(idx_width);
425  const unsigned int kernel_height = weights->dimension(idx_height);
426  const unsigned int num_kernels = weights->dimension(idx_kernels);
427 
428  TensorInfo im2col_reshaped_info{};
429  TensorInfo info_gemm{};
430  TensorInfo weights_reshaped_info{};
431  const ITensorInfo *gemm_input_to_use = input;
432  const ITensorInfo *gemm_output_to_use = output;
433  const ITensorInfo *weights_to_use = weights;
434  const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
435  const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
436  const bool skip_col2im = data_layout == DataLayout::NHWC;
437  bool fuse_activation = true;
438 
439  ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel));
440  ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
441 
442  // Validate biases
443  if(biases != nullptr)
444  {
445  if(is_quantized)
446  {
448  }
449  else
450  {
452  }
453  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
455  }
456 
457  if(act_info.enabled())
458  {
460  }
461 
462  // Get convolved dimensions
463  unsigned int conv_w = 0;
464  unsigned int conv_h = 0;
465 
466  std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(idx_width),
467  input->dimension(idx_height),
468  kernel_width,
469  kernel_height,
470  conv_info,
471  dilation);
472 
473  unsigned int mat_weights_cols = num_kernels / num_groups;
474 
475  const ITensorInfo *biases_to_use = biases;
476  bool append_bias = false;
477 
478  if(num_groups != 1 && biases != nullptr)
479  {
480  // num_groups != 1 can only be for NCHW
481  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
482  biases_to_use = nullptr;
483  append_bias = true;
484 
486  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, true, num_groups), 1, data_type);
487  }
488  else
489  {
491  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, false, num_groups), 1, data_type);
492  }
493 
494  weights_to_use = &weights_reshaped_info;
495 
496  if(!skip_im2col)
497  {
498  const Size2D kernel_dims(kernel_width, kernel_height);
499 
500  // Output tensor auto initialization if not yet initialized
501  TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, num_groups == 1, num_groups);
502 
503  auto_init_if_empty(im2col_reshaped_info, input->clone()->set_tensor_shape(expected_output_shape));
504 
505  ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &im2col_reshaped_info, kernel_dims, conv_info, append_bias, dilation, num_groups));
506  gemm_input_to_use = &im2col_reshaped_info;
507  }
508 
509  // Create GEMM output tensor
510  if(!skip_col2im)
511  {
512  TensorShape shape_gemm;
513 
514  shape_gemm = gemm_input_to_use->tensor_shape();
515  shape_gemm.set(0, mat_weights_cols);
516  shape_gemm.set(1, conv_w * conv_h);
517 
518  info_gemm = TensorInfo(shape_gemm, 1, data_type);
519  info_gemm.set_quantization_info(output->quantization_info()).set_data_layout(input->data_layout());
520  gemm_output_to_use = &info_gemm;
521  }
522 
523  GEMMLowpOutputStageInfo gemmlowp_output_stage;
525  gemmlowp_output_stage.gemmlowp_offset = 0;
526  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
527 
528  if(is_quantized)
529  {
530  const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
531  const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
532  const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info;
533  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
534 
535  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
536  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
538  weights,
539  output,
540  idx_kernels,
541  gemmlowp_output_stage.gemmlowp_multipliers.data(),
542  gemmlowp_output_stage.gemmlowp_shifts.data());
543  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
544  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
545 
546  int min_activation = 0;
547  int max_activation = 0;
548 
549  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
552  };
553 
554  if(act_info.enabled())
555  {
556  if(supported_acts.count(act_info.activation()) != 0)
557  {
558  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, output_quant_info);
559  }
560  else
561  {
562  fuse_activation = false;
563  }
564  }
565 
566  // Set the GEMMLowp output stage info
567  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
568  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
569  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
570  }
571 
572  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
573  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
574 
575  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, act_info));
576 
577  // Validate Col2Im
578  if(!skip_col2im)
579  {
580  ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups));
581  }
582 
583  //Validate Activation Layer
584  if(!fuse_activation)
585  {
587  }
588 
589  return Status{};
590 }
591 
593 {
594  prepare();
595 
596  MemoryGroupResourceScope scope_mg(_memory_group);
597 
598  // Run im2col
599  if(!_skip_im2col)
600  {
601  CLScheduler::get().enqueue(_im2col_kernel);
602  }
603 
604  // Runs CLGEMM or CLGEMMLowpMatrixMultiplyCore functions
605  if(_is_quantized)
606  {
607  // Run gemmlowp
608  _mm_gemmlowp.run();
609  }
610  else
611  {
612  // Run gemm
613  _mm_gemm.run();
614  }
615 
616  // Reshape output matrix
617  if(!_skip_col2im)
618  {
619  CLScheduler::get().enqueue(_col2im_kernel, false);
620  }
621 
622  //Run Activation Layer if we cannot fuse in GEMM
623  if(!_fuse_activation)
624  {
625  _activationlayer_function.run();
626  }
627 }
628 
630 {
631  if(!_is_prepared)
632  {
633  ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
634  if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
635  {
636  _weights_manager->run(_original_weights, &_reshape_weights_managed);
637  }
638  else
639  {
640  // Run weights reshaping and mark original weights tensor as unused
641  _weights_reshaped.allocator()->allocate();
642  _reshape_weights.run();
643  _original_weights->mark_as_unused();
644  }
645 
646  // Prepare GEMM
647  _is_quantized ? _mm_gemmlowp.prepare() : _mm_gemm.prepare();
648  if(!_weights_reshaped.is_used())
649  {
650  _weights_reshaped.allocator()->free();
651  }
652 
653  CLScheduler::get().queue().finish();
654  _is_prepared = true;
655  }
656 }
657 } // namespace arm_compute
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:1117
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Set the input and output of the kernel.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1948
Shape of a tensor.
Definition: TensorShape.h:39
const DataLayout data_layout
Definition: Im2Col.cpp:146
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
Definition: CLGEMM.cpp:720
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: CLTensor.cpp:41
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, unsigned int idx_ofms, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLWeightsReshapeKernel.
void prepare() override
Prepare the function for executing.
void run() override
Run the kernels contained in the function.
Definition: CLGEMM.cpp:639
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:99
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:494
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
size_t dimension(size_t index) const override
Return the size of the requested dimension.
Definition: TensorInfo.h:232
bool is_used() const
Flags if the tensor is used or not.
Definition: ITensor.cpp:162
QuantizationInfo quantization_info() const override
Get the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.h:311
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLGEMMConvolutionLayer.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: CLTensor.cpp:61
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
Definition: Types.h:1947
Status class.
Definition: Error.h:52
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8.
Definition: Types.h:1951
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1615
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Definition: Types.h:1946
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2020 ARM Limited.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: Helpers.inl:202
1 channel, 1 F16 per channel
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLCol2ImKernel.
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
Definition: Utils.cpp:402
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.cpp:372
DataType data_type() const override
Data type used for each element of the tensor.
Definition: TensorInfo.h:265
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Definition: Types.h:1954
Convolution Layer Weights Information class.
Definition: Types.h:1757
std::vector< int32_t > gemmlowp_shifts
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1953
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:167
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLIm2ColKernel.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
CLGEMMConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
void run() override final
Run the kernels contained in the function.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: Utils.h:1194
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
Definition: Utils.cpp:478
void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups=1)
Set the input and output of the kernel.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
const unsigned int num_groups
Definition: Im2Col.cpp:148
std::vector< int32_t > gemmlowp_multipliers
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1952
UniformQuantizationInfo uniform() const
Return per layer quantization info.
GEMMLowp output stage info.
Definition: Types.h:1944
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
Definition: Types.h:686
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Definition: CLScheduler.cpp:41
Weights manager interface to handle weights transformations.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Num samples, channels, height, width.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Set the input and output tensors.
src_info set_data_layout(data_layout)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1139
static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLConvolutionLayerReshap...
quantized, symmetric per channel fixed-point 8-bit number
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
Definition: Types.h:1949
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
void configure(const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
Configures the CLConvolutionLayerReshapeWeights function.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
Definition: CLGEMM.cpp:537
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Num samples, height, width, channels.
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
const QuantizationInfo weights_quantization_info
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
Definition: CLGEMM.cpp:592
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:78
void prepare() override
Prepare the function for executing.
Store the tensor's metadata.
Definition: TensorInfo.h:45
void set_target(GPUTarget target)
Set the targeted GPU architecture.
Definition: ICLKernel.h:271
GEMM information class.
Definition: Types.h:1983
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups=1)
Set the input and output tensors.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:327
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8.
Definition: Types.h:1950
void tune_kernel_static(ICLKernel &kernel)
Tunes OpenCL kernel.
Definition: CLScheduler.cpp:79
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:261
DataType
Available data types.
Definition: Types.h:75
DataLayout
[DataLayout enum definition]
Definition: Types.h:117
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups=1)
Set the input and output of the kernel.
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.