Compute Library
 21.02
NEGEMMConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
27 #include "arm_compute/core/Utils.h"
32 
46 
47 #include <set>
48 #include <tuple>
49 
50 namespace arm_compute
51 {
53 
56  : _weights_reshape_kernel()
57 {
58 }
59 
60 void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const ITensor *biases, ITensor *output)
61 {
62  // Perform validation step
63  ARM_COMPUTE_ERROR_ON_NULLPTR(weights, output);
65  (biases != nullptr) ? biases->info() : nullptr,
66  output->info()));
67  const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
68  const ITensor *biases_to_use = (append_biases) ? biases : nullptr;
69 
70  _weights_reshape_kernel = std::make_unique<NEWeightsReshapeKernel>();
71  _weights_reshape_kernel->configure(weights, biases_to_use, output);
72 
73  output->info()->set_quantization_info(weights->info()->quantization_info());
74 }
75 
77 {
83 
84  if(biases != nullptr)
85  {
89  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
91  }
92 
93  if((output != nullptr) && (output->total_size() != 0))
94  {
96 
97  NEWeightsReshapeKernel::validate(weights, biases, output);
98  }
99 
100  return Status{};
101 }
102 
104 {
105  NEScheduler::get().schedule(_weights_reshape_kernel.get(), 3);
106 }
107 
109 
110 NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager, IWeightsManager *weights_manager)
111  : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager),
112  _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _original_output(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _gemm_output_3d(), _tmp_output(),
113  _data_layout(DataLayout::NCHW), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _is_prepared(false)
114 {
115 }
116 
117 void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info, int gemm_3d_depth)
118 {
119  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights);
120  ARM_COMPUTE_ERROR_THROW_ON(validate_mm(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output == nullptr ? nullptr : output->info(),
121  act_info, gemm_3d_depth, _skip_im2col));
122 
123  // Create GEMMInfo structure
124  const GEMMInfo &gemm_info = GEMMInfo(false, false, true /* Reshape weights only for the first run */,
125  gemm_3d_depth, _skip_im2col /* Reinterpret the input as 3D if im2col is skipped */,
126  false, GEMMLowpOutputStageInfo(), false, false, act_info);
127 
128  // Supported activations in GEMM
129  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
132  };
133 
134  if(_is_quantized)
135  {
136  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
137  // Extract and negate input and weights offset
138  const QuantizationInfo iqinfo = input->info()->quantization_info();
139  const QuantizationInfo wqinfo = weights->info()->quantization_info();
140  const QuantizationInfo oqinfo = (output->info()->total_size() == 0) ? iqinfo : output->info()->quantization_info();
141  const UniformQuantizationInfo uiqinfo = iqinfo.uniform();
142  const UniformQuantizationInfo uoqinfo = oqinfo.uniform();
143  const DataType data_type = input->info()->data_type();
144 
145  input->info()->set_quantization_info(QuantizationInfo(uiqinfo.scale, -uiqinfo.offset));
147  {
148  const UniformQuantizationInfo uwqinfo = wqinfo.uniform();
149  weights->info()->set_quantization_info(QuantizationInfo(uwqinfo.scale, -uwqinfo.offset));
150  }
151 
152  // Merge activation with output stage
155  std::tie(type_min, type_max) = get_min_max(data_type);
156  int32_t min_activation = type_min.get<int32_t>();
157  int32_t max_activation = type_max.get<int32_t>();
158 
159  if(supported_acts.count(act_info.activation()) != 0)
160  {
161  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, uoqinfo);
162  }
163 
166  output_info.gemmlowp_offset = uoqinfo.offset;
167  output_info.gemmlowp_min_bound = min_activation;
168  output_info.gemmlowp_max_bound = max_activation;
169  output_info.is_quantized_per_channel = (weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL);
170  quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info);
171 
172  _mm_gemmlowp.configure(input, weights, biases, output, GEMMInfo(false, false, true, gemm_3d_depth, _skip_im2col, false, output_info, false, false, act_info));
173 
174  // Revert back QuantizatioInfo as input and weights could be used in other convolution layers
175  input->info()->set_quantization_info(iqinfo);
176  weights->info()->set_quantization_info(wqinfo);
177  }
178  else
179  {
180  // Configure matrix multiply function
181  _mm_gemm.configure(input, weights, biases, output, 1.0f, 0.0f, gemm_info);
182  }
183 }
184 
185 Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
186  const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col)
187 {
188  const DataType data_type = input->data_type();
189  const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
190  const bool is_activation_enabled = act_info.enabled();
191 
192  // Create GEMMInfo structure
193  const GEMMInfo gemm_info = GEMMInfo(false, false, true /* Reshape weights only for the first run */,
194  gemm_3d_depth, skip_im2col /* Reinterpret the input as 3D if im2col is skipped */,
195  false, GEMMLowpOutputStageInfo(), false, false, act_info);
196 
197  if(is_quantized)
198  {
199  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
200  // Extract and negate input and weights offset
201  const QuantizationInfo &iqinfo = input->quantization_info();
202  const QuantizationInfo &wqinfo = weights->quantization_info();
203  const QuantizationInfo &oqinfo = (output->total_size() == 0) ? iqinfo : output->quantization_info();
204  const UniformQuantizationInfo uoqinfo = oqinfo.uniform();
205 
206  // Merge activation with output stage
209  std::tie(type_min, type_max) = get_min_max(data_type);
210  int32_t min_activation = type_min.get<int32_t>();
211  int32_t max_activation = type_max.get<int32_t>();
212 
213  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
216  };
217  if(is_activation_enabled && supported_acts.count(act_info.activation()) != 0)
218  {
219  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, uoqinfo);
220  }
221 
224  output_info.gemmlowp_offset = uoqinfo.offset;
225  output_info.gemmlowp_min_bound = min_activation;
226  output_info.gemmlowp_max_bound = max_activation;
229 
230  // Perform validation step on GEMMLowp
231  std::unique_ptr<ITensorInfo> input_qa = input->clone();
232  std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
233  input_qa->set_quantization_info(QuantizationInfo(iqinfo.uniform().scale, -iqinfo.uniform().offset));
234  weights_qa->set_quantization_info(QuantizationInfo(wqinfo.uniform().scale, -wqinfo.uniform().offset));
235  return NEGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, GEMMInfo(false, false, true, gemm_3d_depth, skip_im2col, false, output_info, false, false, act_info));
236  }
237  else
238  {
239  // Perform validation step on Matrix multiply function
240  return NEGEMM::validate(input, weights, nullptr, output, 1.0f, 0.0f, gemm_info);
241  }
242 }
243 
244 Status NEGEMMConvolutionLayer::validate_gemm3d(const ITensorInfo *input_info, const ITensorInfo *weights_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col)
245 {
246  const DataType data_type = input_info->data_type();
247  const unsigned int mult_y = skip_im2col ? 1U : gemm_3d_depth;
248  const unsigned int mult_z = skip_im2col ? gemm_3d_depth : 1U;
249 
250  // Set dummy tensor shapes for the validation
251  const TensorInfo dummy_input_info(TensorShape(4U, 4U * mult_y, 1U * mult_z), 1, data_type, input_info->quantization_info());
252  const TensorInfo dummy_weights_info(TensorShape(4U, 4U), 1, data_type, weights_info->quantization_info());
253  const TensorInfo dummy_output_info(TensorShape(4U, 4U, gemm_3d_depth), 1, data_type, input_info->quantization_info());
254 
255  return validate_mm(&dummy_input_info, &dummy_weights_info, nullptr, &dummy_output_info, act_info, gemm_3d_depth, skip_im2col);
256 }
257 
258 void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
259  const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
260 {
261  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
262  ARM_COMPUTE_UNUSED(num_groups, weights_info);
264  weights->info(),
265  biases != nullptr ? biases->info() : nullptr,
266  output->info(),
267  conv_info,
268  weights_info,
269  dilation,
270  act_info,
271  num_groups));
272 
273  const DataType data_type = input->info()->data_type();
274  const DataLayout data_layout = input->info()->data_layout();
277  const int idx_kernels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
278 
279  const unsigned int kernel_width = weights->info()->dimension(idx_width);
280  const unsigned int kernel_height = weights->info()->dimension(idx_height);
281 
282  _is_prepared = weights_info.retain_internal_weights();
283  _original_weights = weights;
284  _original_output = output;
285  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
286  _data_layout = data_layout;
287  _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
288 
289  const ITensor *gemm_input_to_use = input;
290  ITensor *gemm_output_to_use = output;
291 
292  // Get convolved dimensions
293  unsigned int conv_w = 0;
294  unsigned int conv_h = 0;
295  std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(idx_width),
296  input->info()->dimension(idx_height),
297  kernel_width,
298  kernel_height,
299  conv_info,
300  dilation);
301 
302  // Check if GEMM3D is supported
303  if(data_layout == DataLayout::NHWC)
304  {
305  _skip_col2im = bool(validate_gemm3d(input->info(), weights->info(), act_info, conv_h, true));
306  // If not supported, we need to perform im2col and col2im (or reshape layer)
307  if(!_skip_col2im)
308  {
309  _skip_im2col = false;
310  }
311  }
312  else
313  {
314  _skip_col2im = false;
315  }
316 
317  // Get parameters from conv_info
318  unsigned int stride_x = 0;
319  unsigned int stride_y = 0;
320  std::tie(stride_x, stride_y) = conv_info.stride();
321 
322  unsigned int mat_weights_cols = weights->info()->dimension(idx_kernels);
323 
324  // _weights_reshaped will be auto configured in the kernel.
325  // Just append biases and do not transpose 1xW as it will be reshaped in NEGEMM
326  const ITensor *weights_to_use = weights;
327 
328  if(_weights_manager && _weights_manager->are_weights_managed(weights))
329  {
330  _reshape_weights_managed.configure(weights, nullptr);
331  weights_to_use = _weights_manager->acquire(weights, &_reshape_weights_managed);
332  }
333  else
334  {
335  _reshape_weights.configure(weights, nullptr, &_weights_reshaped);
336  weights_to_use = &_weights_reshaped;
337  }
338 
339  // Create tensor to store im2col reshaped inputs
340  if(!_skip_im2col)
341  {
342  _memory_group.manage(&_im2col_output);
343 
344  // Configure
345  _im2col_kernel = std::make_unique<NEIm2ColKernel>();
346  _im2col_kernel->configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation);
347 
348  // Update GEMM input
349  gemm_input_to_use = &_im2col_output;
350  }
351 
352  // Create temporary GEMM output tensor in case we cannot skip col2im
353  const DataType output_data_type = data_type == DataType::BFLOAT16 ? DataType::F32 : data_type;
354  if(!_skip_col2im)
355  {
356  TensorShape shape_gemm;
357 
358  // Calculate GEMM output shape
359  shape_gemm = _im2col_output.info()->tensor_shape();
360  shape_gemm.set(0, mat_weights_cols);
361  shape_gemm.set(1, conv_w * conv_h);
362 
363  // FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
364  TensorInfo info_gemm(shape_gemm, 1, output_data_type);
365  info_gemm.set_quantization_info(output->info()->quantization_info()).set_data_layout(input->info()->data_layout());
366  _gemm_output.allocator()->init(info_gemm);
367  _gemm_output_3d.allocator()->init(info_gemm);
368  _memory_group.manage(&_gemm_output);
369 
370  // Update GEMM output
371  gemm_output_to_use = &_gemm_output;
372  }
373  else
374  {
375  TensorInfo out_info{ *output->info() };
376  out_info.set_data_type(output_data_type).set_data_layout(input->info()->data_layout());
377  _gemm_output.allocator()->init(out_info);
378  _gemm_output_3d.allocator()->init(out_info);
379  _memory_group.manage(&_gemm_output);
380 
381  // Update GEMM output
382  gemm_output_to_use = &_gemm_output_3d;
383  }
384 
385  // Configure GEMM
386  // In case we need to skip col2im, GEMM3D (gemm_3d_depth != 0) must be called in order to avoid reshaping the output matrix
387  const unsigned int gemm_3d_depth = _skip_col2im ? conv_h : 0;
388  configure_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, act_info, gemm_3d_depth);
389 
390  if(!_skip_im2col)
391  {
392  _im2col_output.allocator()->allocate();
393  }
394 
395  if(!_skip_col2im)
396  {
397  if(_data_layout == DataLayout::NCHW)
398  {
399  // Configure col2im
400  _col2im_kernel = std::make_unique<NECol2ImKernel>();
401  _col2im_kernel->configure(gemm_output_to_use, output, Size2D(conv_w, conv_h));
402  }
403  else
404  {
405  // Configure reshape layer
406  _reshape_layer.configure(gemm_output_to_use, output);
407  }
408  }
409  else
410  {
411  // Configure reshape layer
412  _reshape_layer.configure(gemm_output_to_use, output);
413  }
414 
415  if(_is_quantized && !_skip_col2im)
416  {
417  _tmp_output.allocator()->allocate();
418  }
419 
420  _gemm_output.allocator()->allocate();
421 
422  ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
423  "Output shape does not match the expected one");
424 }
425 
426 Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
427  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
428 {
429  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
430  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
434  ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Grouping (num_groups != 1) is not supported on Neon");
435 
436  const DataLayout data_layout = input->data_layout();
437  const DataType data_type = input->data_type();
440  const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
441  const int idx_kernels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
442 
443  const unsigned int kernel_width = weights->dimension(idx_width);
444  const unsigned int kernel_height = weights->dimension(idx_height);
445 
446  TensorInfo im2col_reshaped_info{};
447  TensorInfo info_gemm{};
448  TensorInfo tmp_info{};
449  TensorInfo weights_reshaped_info{};
450  const ITensorInfo *gemm_input_to_use = input;
451  const ITensorInfo *gemm_output_to_use = output;
452  const ITensorInfo *weights_to_use = weights;
453 
454  const bool append_bias = false;
455  const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
456  const bool is_bf16 = data_type == DataType::BFLOAT16;
457  bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
458 
459  // Get convolved dimensions
460  unsigned int conv_w = 0;
461  unsigned int conv_h = 0;
462 
463  std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(idx_width),
464  input->dimension(idx_height),
465  kernel_width,
466  kernel_height,
467  conv_info,
468  dilation);
469 
470  // Check if GEMM3D is supported
471  bool skip_col2im = false;
472  if(data_layout == DataLayout::NHWC)
473  {
474  skip_col2im = bool(validate_gemm3d(input, weights, act_info, conv_h, true));
475  // If not supported, we need to perform im2col and col2im (or reshape layer)
476  if(!skip_col2im)
477  {
478  skip_im2col = false;
479  }
480  }
481 
482  if(skip_col2im)
483  {
484  // If not supported, we need to perform im2col and col2im (or reshape layer)
485  if(!bool(validate_gemm3d(input, weights, act_info, conv_h, skip_im2col)))
486  {
487  skip_im2col = false;
488  skip_col2im = false;
489  }
490  }
491 
492  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_channel) != input->dimension(idx_channel));
494 
495  // Validate biases
496  if(biases != nullptr)
497  {
498  if(is_quantized)
499  {
501  }
502  else if(is_bf16)
503  {
505  }
506  else
507  {
509  }
510  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
512  }
513 
514  unsigned int mat_weights_cols = weights->dimension(idx_kernels);
515  unsigned int mat_weights_rows = weights->dimension(idx_width) * weights->dimension(idx_height) * weights->dimension(idx_channel);
516 
517  // Output tensor auto inizialization if not yet initialized
519  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, append_bias), 1, data_type);
520  weights_reshaped_info.set_quantization_info(weights->quantization_info());
521  weights_to_use = &weights_reshaped_info;
522 
523  if(!skip_im2col)
524  {
525  // Create tensor info for im2col reshaped inputs
526  // For Neon the batch size is on the fourth dimension
527  // TODO (giaiod01): Auto-initialize the output shape of im2col COMPMID-1482
528  TensorShape shape_im2col = input->tensor_shape();
529  shape_im2col.set(0, mat_weights_rows);
530  shape_im2col.set(1, conv_w * conv_h);
531  shape_im2col.set(2, 1);
532 
533  im2col_reshaped_info = TensorInfo(shape_im2col, 1, data_type);
534  im2col_reshaped_info.set_quantization_info(input->quantization_info());
535 
536  ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation));
537  gemm_input_to_use = &im2col_reshaped_info;
538  }
539 
540  // Create temporary GEMM output tensor in case we cannot skip col2im
541  const DataType output_data_type = data_type == DataType::BFLOAT16 ? DataType::F32 : data_type;
542  if(!skip_col2im)
543  {
544  TensorShape shape_gemm = gemm_input_to_use->tensor_shape();
545  shape_gemm.set(0, mat_weights_cols);
546  shape_gemm.set(1, conv_w * conv_h);
547  info_gemm = TensorInfo(shape_gemm, 1, output_data_type);
548  }
549  else
550  {
551  info_gemm = TensorInfo(output->tensor_shape(), 1, output_data_type);
552  }
554  gemm_output_to_use = &info_gemm;
555  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, act_info, skip_col2im ? conv_h : 0, skip_im2col));
556 
557  // Validate Col2Im/ReshapeLayer
558  if(!skip_col2im && (data_layout == DataLayout::NCHW))
559  {
560  ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h)));
561  }
562 
563  return Status{};
564 }
565 
567 {
568  prepare();
569 
570  MemoryGroupResourceScope scope_mg(_memory_group);
571 
572  bool out_has_padding = _skip_col2im && (_original_output->info()->padding().bottom != 0 || _original_output->info()->padding().top != 0);
573 
574  if(!_skip_im2col)
575  {
576  // Run input reshaping
577  unsigned int y_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
578  NEScheduler::get().schedule(_im2col_kernel.get(), y_dim);
579  }
580 
581  // Handle the case where output has top/bottom padding
582  const ITensor *out_to_use = out_has_padding ? &_gemm_output : _original_output;
583  _gemm_output_3d.info()->extend_padding(out_to_use->info()->padding());
584  _gemm_output_3d.allocator()->import_memory(out_to_use->buffer());
585 
586  // Runs NEGEMM or NEGEMMLowpMatrixMultiplyCore functions
587  if(_is_quantized)
588  {
589  // Run gemmlowp
590  _mm_gemmlowp.run();
591  }
592  else
593  {
594  // Run gemm
595  _mm_gemm.run();
596  }
597 
598  // Reshape output matrix
599  if(!_skip_col2im)
600  {
601  if(_data_layout == DataLayout::NCHW)
602  {
603  NEScheduler::get().schedule(_col2im_kernel.get(), Window::DimY);
604  }
605  else
606  {
607  _reshape_layer.run();
608  }
609  }
610  else if(out_has_padding)
611  {
612  _reshape_layer.run();
613  }
614 
615  _gemm_output_3d.allocator()->free();
616 }
617 
619 {
620  if(!_is_prepared)
621  {
622  if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
623  {
624  _weights_manager->run(_original_weights, &_reshape_weights_managed);
625  }
626  else
627  {
628  // Run weights reshaping and mark original weights tensor as unused
629  _weights_reshaped.allocator()->allocate();
630  _reshape_weights.run();
631  _original_weights->mark_as_unused();
632  }
633 
634  // Prepare GEMM
635  _is_quantized ? _mm_gemmlowp.prepare() : _mm_gemm.prepare();
636  if(!_weights_reshaped.is_used())
637  {
638  _weights_reshaped.allocator()->free();
639  }
640 
641  _is_prepared = true;
642  }
643 }
644 } // namespace arm_compute
unsigned int top
top of the border
Definition: Types.h:375
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
void prepare() override
Prepare the function for executing.
Shape of a tensor.
Definition: TensorShape.h:39
Quantize using a fixed point multiplication.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:494
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
bool enabled() const
Check if initialised.
Definition: Types.h:1600
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
Definition: ITensor.cpp:163
bool are_reshaped() const
Flag which specifies if the weights tensor has been reshaped.
Definition: Types.h:1789
1 channel, 1 F32 per channel
const DataLayout data_layout
Definition: Im2Col.cpp:151
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
unsigned int bottom
bottom of the border
Definition: Types.h:377
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
Definition: Types.h:1955
Status class.
Definition: Error.h:52
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8...
Definition: Types.h:1959
~NEGEMMConvolutionLayer()
Default destructor.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1550
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Definition: Types.h:1954
Interface for Neon tensor.
Definition: ITensor.h:36
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of NEGEMMConvolutionLayer.
Copyright (c) 2017-2021 Arm Limited.
static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEWeightsReshapeKernel.
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
Definition: Utils.cpp:419
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.cpp:380
TensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: Tensor.cpp:48
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Definition: Tensor.cpp:33
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Definition: Types.h:1963
Convolution Layer Weights Information class.
Definition: Types.h:1765
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:168
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEConvolutionLayerReshap...
16-bit brain floating-point number
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
const DataType data_type
Definition: Im2Col.cpp:150
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMM.
Definition: NEGEMM.cpp:190
Quantization information.
void run() override
Run the kernels contained in the function.
Definition: NEGEMM.cpp:309
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel&#39;s inputs, output.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: Utils.h:1245
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual ITensorInfo & set_data_layout(const DataLayout &data_layout)=0
Set the data layout of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
Definition: Utils.cpp:483
Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, const QuantizationInfo &wq_info, const QuantizationInfo &oq_info, GEMMLowpOutputStageInfo &stage_info)
Calculate quantized representation of per-channel multipliers.
void run() override
Run the kernels contained in the function.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
const unsigned int num_groups
Definition: Im2Col.cpp:153
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory. ...
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:770
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
Definition: Types.h:1952
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Padding and stride information class.
Definition: Types.h:722
virtual PaddingSize padding() const =0
Padding of tensor.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void free() override
Free allocated CPU memory.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims)
Static function to check if given info will lead to a valid configuration of NECol2ImKernel.
Weights manager interface to handle weights transformations.
virtual ITensorInfo & set_data_type(DataType data_type)=0
Set the data type to the specified value.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
Num samples, channels, height, width.
src_info set_data_layout(data_layout)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1190
quantized, symmetric per channel fixed-point 8-bit number
__constant DATA_TYPE16 type_min
Definition: minmaxloc.cl:46
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
NEGEMMConvolutionLayer(const std::shared_ptr< IMemoryManager > &memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
void run() override
Run the kernels contained in the function.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Set the input and output tensors.
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
Num samples, height, width, channels.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel&#39;s inputs, output.
Definition: NEGEMM.cpp:72
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
void prepare() override
Prepare the function for executing.
Definition: NEGEMM.cpp:359
__constant DATA_TYPE16 type_max
Definition: minmaxloc.cl:47
Status import_memory(void *memory)
Import an existing memory as a tensor&#39;s backing memory.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel&#39;s inputs and outputs.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiply...
GEMM information class.
Definition: Types.h:2003
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
ActivationFunction activation() const
Get the type of activation function.
Definition: Types.h:1585
quantized, asymmetric fixed-point 8-bit number signed
void prepare() override
Prepare the function for executing.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8...
Definition: Types.h:1958
void configure(const ITensor *weights, const ITensor *biases, ITensor *output)
Set the input and output tensors.
DataType
Available data types.
Definition: Types.h:77
DataLayout
[DataLayout enum definition]
Definition: Types.h:120
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of NEIm2ColKernel.
~NEConvolutionLayerReshapeWeights()
Default destructor.
virtual bool extend_padding(const PaddingSize &padding)=0
Update the offset to the first element, the strides and the total size.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
Definition: Utils.h:564
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:79
void run() override
Run the kernels contained in the function.
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:94