Compute Library
 22.05
ClGemmConv2d.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
30 #include "arm_compute/core/Utils.h"
44 
45 #include "src/common/utils/Log.h"
46 #include "support/Cast.h"
47 
48 namespace arm_compute
49 {
50 using namespace experimental;
51 using namespace misc::shape_calculator;
52 using namespace utils::cast;
53 namespace opencl
54 {
55 ClGemmConv2d::ClGemmConv2d()
56  : _weights_reshape_kernel(nullptr), _im2col_kernel(nullptr), _mm_gemm(nullptr), _mm_gemmlowp(nullptr), _col2im_kernel(nullptr), _activation_kernel(nullptr), _im2col_output(), _weights_reshaped(),
57  _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _append_bias(false), _is_prepared(false), _use_post_ops(false), _aux_mem(AuxTensorIdx::Count)
58 {
59 }
60 ClGemmConv2d::~ClGemmConv2d() = default;
61 
62 void ClGemmConv2d::configure_mm(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
63  const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
64  int gemm_3d_depth, const ActivationLayerInfo &act_info, const experimental::PostOpList<ITensorInfo *> &post_ops)
65 {
66  ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights);
67  ARM_COMPUTE_ERROR_THROW_ON(validate_mm(src, weights, biases, dst, gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info));
68 
69  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
70  false, // is_b_reshaped
71  true, // reshape_b_only_on_first_run
72  gemm_3d_depth, // depth_output_gemm3d
73  _skip_im2col, // reinterpret_input_as_3d
74  false, // retain_internal_weights
75  gemmlowp_output_stage, // gemmlowp_output_stage
76  false, // fast_math
77  false, // fp_mixed_precision
78  true, // broadcast_bias
79  act_info, // activation_info
80  post_ops // post ops
81  );
82 
83  TensorInfo tmp_src{ *src };
84  if(_is_quantized)
85  {
86  ARM_COMPUTE_ERROR_ON_MSG(post_ops.size() > 0, "ClGemmConv2d quantized types do not support post ops");
87  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
88  // Extract and negate input and weights offset
89  const QuantizationInfo input_quantization_info = src->quantization_info();
90  const QuantizationInfo weights_quantization_info = weights->quantization_info();
91 
92  tmp_src.set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
93  weights->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
94 
95  _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>();
96  _mm_gemmlowp->configure(compile_context, &tmp_src, weights, biases, dst, gemm_info);
97 
98  // Revert back QuantizatioInfo as weights could be used in other convolution layers
99  weights->set_quantization_info(weights_quantization_info);
100 
101  auto mm_mem_req = _mm_gemmlowp->workspace();
102  for(unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
103  {
104  _aux_mem[cont] = mm_mem_req[cont];
105  }
106  }
107  else
108  {
109  // Configure matrix multiply function
110  _mm_gemm = std::make_unique<ClGemm>();
111  _mm_gemm->configure(compile_context, &tmp_src, weights, biases, dst, 1.0f, 1.0f, gemm_info);
112  auto mm_mem_req = _mm_gemm->workspace();
113  for(unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
114  {
115  _aux_mem[cont] = mm_mem_req[cont];
116  }
117  }
118 }
119 
120 Status ClGemmConv2d::validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
121  const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info, const experimental::PostOpList<ITensorInfo *> &post_ops)
122 {
123  const bool is_quantized = is_data_type_quantized_asymmetric(src->data_type());
124 
125  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
126  false, // is_b_reshaped
127  true, // reshape_b_only_on_first_run
128  gemm_3d_depth, // depth_output_gemm3d
129  skip_im2col, // reinterpret_input_as_3d
130  false, // retain_internal_weights
131  gemmlowp_output_stage, // gemmlowp_output_stage
132  false, // fast_math
133  false, // fp_mixed_precision
134  true, // broadcast_bias
135  act_info, // activation_info
136  post_ops // post ops
137  );
138 
139  if(is_quantized)
140  {
141  ARM_COMPUTE_RETURN_ERROR_ON_MSG(post_ops.size() > 0, "ClGemmConv2d quantized types do not support post ops");
142  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
143  // Extract and negate input and weights offset
144  const QuantizationInfo input_quantization_info = src->quantization_info();
145  const QuantizationInfo weights_quantization_info = weights->quantization_info();
146 
147  std::unique_ptr<ITensorInfo> src_qa = src->clone();
148  std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
149  src_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
150  weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
151 
152  // Perform validation step on GEMMLowp
153  return ClGemmLowpMatrixMultiplyCore::validate(src_qa.get(), weights_qa.get(), biases, dst, gemm_info);
154  }
155  else
156  {
157  // Perform validation step on Matrix multiply function
158  return ClGemm::validate(src, weights, biases, dst, 1.0f, 1.0f, gemm_info);
159  }
160 }
161 
162 void ClGemmConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
163  const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info)
164 {
165  ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
166 
167  ARM_COMPUTE_ERROR_THROW_ON(ClGemmConv2d::validate(src, weights, biases, dst,
168  conv2d_info,
169  weights_info));
170  ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv2d_info, weights_info);
171 
172  const DataType data_type = src->data_type();
173  const DataLayout data_layout = src->data_layout();
176  const int idx_kernels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
177 
178  const unsigned int kernel_width = weights->dimension(idx_width);
179  const unsigned int kernel_height = weights->dimension(idx_height);
180  const unsigned int num_kernels = weights->dimension(idx_kernels);
181 
182  const UniformQuantizationInfo iq_info = src->quantization_info().uniform();
183  const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
184 
185  _is_prepared = weights_info.retain_internal_weights();
186  _is_quantized = is_data_type_quantized_asymmetric(src->data_type());
187  _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv2d_info.conv_info.stride().first == 1 && conv2d_info.conv_info.stride().second == 1);
188  _skip_col2im = data_layout == DataLayout::NHWC;
189 
190  // Only for quantize there are few cases where we cannot fuse the activation function in GEMM
191  _fuse_activation = true;
192  _use_post_ops = conv2d_info.post_ops.size() > 0;
193 
194  const ITensorInfo *gemm_input_to_use = src;
195  ITensorInfo *gemm_output_to_use = dst;
196 
197  // Get parameters from conv_info
198  unsigned int stride_x = 0;
199  unsigned int stride_y = 0;
200  std::tie(stride_x, stride_y) = conv2d_info.conv_info.stride();
201 
202  // Get convolved dimensions
203  unsigned int conv_w = 0;
204  unsigned int conv_h = 0;
205  std::tie(conv_w, conv_h) = scaled_dimensions(src->dimension(idx_width),
206  src->dimension(idx_height),
207  kernel_width,
208  kernel_height,
209  conv2d_info.conv_info,
210  conv2d_info.dilation);
211 
212  unsigned int mat_weights_cols = num_kernels / conv2d_info.num_groups;
213 
214  ITensorInfo *biases_to_use = biases;
215  _append_bias = false;
216 
217  _weights_reshape_kernel = std::make_unique<kernels::ClWeightsReshapeKernel>();
218  if(conv2d_info.num_groups != 1 && biases != nullptr)
219  {
220  // num_groups != 1 can only be for NCHW
221  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
222  biases_to_use = nullptr;
223  _append_bias = true;
224  _weights_reshape_kernel->configure(compile_context, weights, biases, &_weights_reshaped, conv2d_info.num_groups);
225  }
226  else
227  {
228  _weights_reshape_kernel->configure(compile_context, weights, nullptr, &_weights_reshaped, conv2d_info.num_groups);
229  }
230 
231  // Create tensor to store im2col reshaped inputs
232  if(!_skip_im2col)
233  {
234  // Configure and tune im2col. im2col output shape is auto-initialized
235  _im2col_kernel = std::make_unique<opencl::kernels::ClIm2ColKernel>();
236 
237  // Set the GPU target for im2col
238  _im2col_kernel->set_target(CLScheduler::get().target());
239  _im2col_kernel->configure(compile_context, src, &_im2col_output, Size2D(kernel_width, kernel_height), conv2d_info.conv_info, _append_bias, conv2d_info.dilation, conv2d_info.num_groups);
240 
241  // Set quantization info
242  _im2col_output.set_quantization_info(src->quantization_info());
243  CLScheduler::get().tune_kernel_static(*_im2col_kernel);
244 
245  // Update GEMM input
246  gemm_input_to_use = &_im2col_output;
247  }
248 
249  // Create GEMM output tensor
250  if(!_skip_col2im)
251  {
252  TensorShape shape_gemm;
253 
254  // If we cannot skip col2im it means we run im2col as well
255  shape_gemm = _im2col_output.tensor_shape();
256  shape_gemm.set(0, mat_weights_cols);
257  shape_gemm.set(1, conv_w * conv_h);
258 
259  _gemm_output = TensorInfo(shape_gemm, 1, data_type);
261 
262  // Update GEMM output
263  gemm_output_to_use = &_gemm_output;
264  }
265 
266  GEMMLowpOutputStageInfo gemmlowp_output_stage;
268  gemmlowp_output_stage.gemmlowp_offset = 0;
269 
270  // Configure output stage for quantized case
271  if(_is_quantized)
272  {
273  const auto output_quant_info = (dst->total_size() == 0) ? iq_info : oq_info;
274  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type());
275  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
276 
277  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
278 
279  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
280  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
282  gemmlowp_output_stage.gemmlowp_multipliers.data(),
283  gemmlowp_output_stage.gemmlowp_shifts.data());
284  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
285  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
286 
287  PixelValue min_val{};
288  PixelValue max_val{};
289  std::tie(min_val, max_val) = get_min_max(dst->data_type());
290 
291  auto min_activation = min_val.get<int32_t>();
292  auto max_activation = max_val.get<int32_t>();
293 
294  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
297  };
298 
299  if(conv2d_info.act_info.enabled())
300  {
301  if(supported_acts.count(conv2d_info.act_info.activation()) != 0)
302  {
303  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(conv2d_info.act_info, data_type, output_quant_info);
304  }
305  else
306  {
307  _fuse_activation = false;
308  }
309  }
310 
311  // Set the GEMMLowp output stage info
312  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
313  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
314  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
315  }
316 
317  // Configure and tune GEMM
318  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
319  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
320 
321  configure_mm(compile_context, gemm_input_to_use, &_weights_reshaped, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, conv2d_info.act_info, conv2d_info.post_ops);
322 
323  if(!_skip_col2im)
324  {
325  ARM_COMPUTE_ERROR_ON_MSG(conv2d_info.post_ops.size() > 0, "ClGemmConv2d does not support post ops with col2im operation"); // Post ops must be performed after every other op
326  // Set the GPU target for col2im
327  _col2im_kernel = std::make_unique<opencl::kernels::ClCol2ImKernel>();
328  _col2im_kernel->set_target(CLScheduler::get().target());
329  // Configure and tune Col2Im
330  _col2im_kernel->configure(compile_context, gemm_output_to_use, dst, Size2D(conv_w, conv_h), conv2d_info.num_groups);
331  CLScheduler::get().tune_kernel_static(*_col2im_kernel.get());
332  }
333 
334  ARM_COMPUTE_ERROR_ON_MSG((dst->dimension(idx_width) != conv_w) || (dst->dimension(idx_height) != conv_h),
335  "Output shape does not match the expected one");
336 
337  // Disable running of activation kernel if post ops are used
338  if(!_fuse_activation && !_use_post_ops)
339  {
340  _activation_kernel = std::make_unique<opencl::kernels::ClActivationKernel>();
341  _activation_kernel->configure(compile_context, dst, nullptr, conv2d_info.act_info);
342  }
343 
344  _aux_mem[Im2ColOutput] = MemoryInfo(offset_int_vec(Im2ColOutput), MemoryLifetime::Temporary, _im2col_output.total_size());
345  _aux_mem[WeightsReshaped] = MemoryInfo(offset_int_vec(WeightsReshaped), MemoryLifetime::Persistent, _weights_reshaped.total_size());
346  _aux_mem[GemmOutput] = MemoryInfo(offset_int_vec(GemmOutput), MemoryLifetime::Temporary, _gemm_output.total_size());
347 }
348 
349 Status ClGemmConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,
350  const WeightsInfo &weights_info)
351 {
352  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
353  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
355  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type());
356 
357  if(!is_quantized_per_channel)
358  {
360  }
362  ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
363  ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_type() == DataType::QASYMM8), "Grouping (num_groups != 1) is not supported with QASYMM8");
364  ARM_COMPUTE_RETURN_ERROR_ON(((src->dimension(2) / weights->dimension(2)) != conv2d_info.num_groups) && (src->data_layout() == DataLayout::NCHW));
365 
366  const DataLayout data_layout = src->data_layout();
367  const DataType data_type = src->data_type();
370  const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
371  const int idx_kernels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
372 
373  const unsigned int kernel_width = weights->dimension(idx_width);
374  const unsigned int kernel_height = weights->dimension(idx_height);
375  const unsigned int num_kernels = weights->dimension(idx_kernels);
376 
377  TensorInfo im2col_reshaped_info{};
378  TensorInfo info_gemm{};
379  TensorInfo weights_reshaped_info{};
380  const ITensorInfo *gemm_input_to_use = src;
381  const ITensorInfo *gemm_output_to_use = dst;
382  const ITensorInfo *weights_to_use = weights;
383  const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
384  const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv2d_info.conv_info.stride().first == 1
385  && conv2d_info.conv_info.stride().second == 1);
386  const bool skip_col2im = data_layout == DataLayout::NHWC;
387  bool fuse_activation = true;
388  bool use_post_ops = conv2d_info.post_ops.size() > 0;
389 
390  ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * conv2d_info.num_groups) != src->dimension(idx_channel));
393  && conv2d_info.post_ops.size() > 0,
394  "ClGemmConv2d does not support post ops with col2im or im2col operation"); // Post ops must be performed after every other op
395 
396  // Validate biases
397  if(biases != nullptr)
398  {
399  if(is_quantized)
400  {
402  }
403  else
404  {
406  }
407  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
409  }
410 
411  if(conv2d_info.act_info.enabled())
412  {
413  ARM_COMPUTE_ERROR_ON(conv2d_info.act_info.b() > conv2d_info.act_info.a());
414  }
415 
416  // Get convolved dimensions
417  unsigned int conv_w = 0;
418  unsigned int conv_h = 0;
419 
420  std::tie(conv_w, conv_h) = scaled_dimensions(src->dimension(idx_width),
421  src->dimension(idx_height),
422  kernel_width,
423  kernel_height,
424  conv2d_info.conv_info,
425  conv2d_info.dilation);
426 
427  unsigned int mat_weights_cols = num_kernels / conv2d_info.num_groups;
428 
429  const ITensorInfo *biases_to_use = biases;
430  bool append_bias = false;
431 
432  if(conv2d_info.num_groups != 1 && biases != nullptr)
433  {
434  // num_groups != 1 can only be for NCHW
435  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
436  biases_to_use = nullptr;
437  append_bias = true;
438  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, true, conv2d_info.num_groups), 1, data_type);
439  }
440  else
441  {
442  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, false, conv2d_info.num_groups), 1, data_type);
443  }
444 
445  weights_to_use = &weights_reshaped_info;
446 
447  if(!skip_im2col)
448  {
449  const Size2D kernel_dims(kernel_width, kernel_height);
450 
451  // Output tensor auto initialization if not yet initialized
452  TensorShape expected_output_shape = compute_im2col_conv_shape(src, kernel_dims, conv2d_info.conv_info, append_bias, conv2d_info.dilation, conv2d_info.num_groups == 1, conv2d_info.num_groups);
453 
454  auto_init_if_empty(im2col_reshaped_info, src->clone()->set_tensor_shape(expected_output_shape));
455 
456  ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClIm2ColKernel::validate(src, &im2col_reshaped_info, kernel_dims, conv2d_info.conv_info, append_bias, conv2d_info.dilation, conv2d_info.num_groups));
457  gemm_input_to_use = &im2col_reshaped_info;
458  }
459 
460  // Create GEMM output tensor
461  if(!skip_col2im)
462  {
463  TensorShape shape_gemm;
464 
465  shape_gemm = gemm_input_to_use->tensor_shape();
466  shape_gemm.set(0, mat_weights_cols);
467  shape_gemm.set(1, conv_w * conv_h);
468 
469  info_gemm = TensorInfo(shape_gemm, 1, data_type);
470  info_gemm.set_quantization_info(dst->quantization_info()).set_data_layout(src->data_layout());
471  gemm_output_to_use = &info_gemm;
472  }
473 
474  GEMMLowpOutputStageInfo gemmlowp_output_stage;
476  gemmlowp_output_stage.gemmlowp_offset = 0;
477  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
478 
479  if(is_quantized)
480  {
481  const UniformQuantizationInfo iq_info = src->quantization_info().uniform();
482  const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
483  const auto output_quant_info = (dst->total_size() == 0) ? iq_info : oq_info;
484  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
485 
486  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
487  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
489  gemmlowp_output_stage.gemmlowp_multipliers.data(),
490  gemmlowp_output_stage.gemmlowp_shifts.data());
491  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
492  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
493 
494  int min_activation = 0;
495  int max_activation = 0;
496 
497  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
500  };
501 
502  if(conv2d_info.act_info.enabled())
503  {
504  if(supported_acts.count(conv2d_info.act_info.activation()) != 0)
505  {
506  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(conv2d_info.act_info, data_type, output_quant_info);
507  }
508  else
509  {
510  fuse_activation = false;
511  }
512  }
513 
514  // Set the GEMMLowp output stage info
515  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
516  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
517  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
518  }
519 
520  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
521  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
522 
523  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, conv2d_info.act_info,
524  conv2d_info.post_ops));
525 
526  // Validate Col2Im
527  if(!skip_col2im)
528  {
529  ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClCol2ImKernel::validate(gemm_output_to_use, dst, Size2D(conv_w, conv_h), conv2d_info.num_groups));
530  }
531 
532  // Validate Activation Layer
533  // Disable running (thus validation) of activation kernel if post ops are used
534  if(!fuse_activation && !use_post_ops)
535  {
537  }
538 
539  return Status{};
540 }
541 
543 {
544  prepare(tensors);
545 
546  auto src = tensors.get_const_tensor(ACL_SRC_0);
547  auto biases = tensors.get_const_tensor(ACL_SRC_2);
548  auto dst = tensors.get_tensor(ACL_DST);
549  auto gemm_input_to_use = src;
550  auto gemm_output_to_use = dst;
551 
552  CLAuxTensorHandler im2col_output(offset_int_vec(Im2ColOutput), _im2col_output, tensors, false);
553  CLAuxTensorHandler gemm_output(offset_int_vec(GemmOutput), _gemm_output, tensors, false);
554  CLAuxTensorHandler weights_reshaped(offset_int_vec(WeightsReshaped), _weights_reshaped, tensors, false);
555 
556  // Run im2col
557  if(!_skip_im2col)
558  {
559  ITensorPack pack =
560  {
561  { TensorType::ACL_SRC, src },
562  { TensorType::ACL_DST, im2col_output.get() }
563  };
564  CLScheduler::get().enqueue_op(*_im2col_kernel, pack, false);
565  gemm_input_to_use = im2col_output.get();
566  }
567  if(!_skip_col2im)
568  {
569  gemm_output_to_use = gemm_output.get();
570  }
571  ITensorPack pack_mm = tensors;
572  pack_mm.add_const_tensor(TensorType::ACL_SRC_0, gemm_input_to_use);
573  pack_mm.add_const_tensor(TensorType::ACL_SRC_1, weights_reshaped.get());
574  if(!_append_bias)
575  {
576  pack_mm.add_const_tensor(TensorType::ACL_SRC_2, biases);
577  }
578  pack_mm.add_tensor(TensorType::ACL_DST, gemm_output_to_use);
579  // Runs ClGemm or ClGemmLowpMatrixMultiplyCore functions
580  if(_is_quantized)
581  {
582  // Run gemmlowp
583  _mm_gemmlowp->run(pack_mm);
584  }
585  else
586  {
587  // Run gemm
588  _mm_gemm->run(pack_mm);
589  }
590 
591  // Reshape output matrix
592  if(!_skip_col2im)
593  {
594  ITensorPack pack =
595  {
596  { TensorType::ACL_SRC, gemm_output_to_use },
597  { TensorType::ACL_DST, dst }
598  };
599  CLScheduler::get().enqueue_op(*_col2im_kernel.get(), pack, false);
600  }
601 
602  //Run Activation Layer if we cannot fuse in GEMM
603  // Disable running of activation kernel if post ops are used
604  if(!_fuse_activation && !_use_post_ops)
605  {
606  ITensorPack pack =
607  {
608  { TensorType::ACL_SRC, dst },
609  { TensorType::ACL_DST, dst }
610  };
611  CLScheduler::get().enqueue_op(*_activation_kernel.get(), pack, false);
612  }
613 }
614 
616 {
617  if(!_is_prepared)
618  {
619  // Run weights reshaping and mark original weights tensor as unused
620  ICLTensor *weights_reshaped_p = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(offset_int_vec(WeightsReshaped)));
621  CLAuxTensorHandler weights_reshaped(_weights_reshaped, *weights_reshaped_p);
622  auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
623  ITensorPack pack =
624  {
625  { TensorType::ACL_SRC, weights },
626  { TensorType::ACL_DST, weights_reshaped.get() }
627  };
628 
629  if(_append_bias)
630  {
631  const auto biases = tensors.get_const_tensor(TensorType::ACL_SRC_2);
633  }
634  CLScheduler::get().enqueue_op(*_weights_reshape_kernel.get(), pack, true);
635  tensors.add_const_tensor(TensorType::ACL_SRC_1, weights_reshaped.get());
636 
637  // Prepare GEMM
638  _is_quantized ? _mm_gemmlowp->prepare(tensors) : _mm_gemm->prepare(tensors);
639  _is_prepared = true;
640  }
641 }
643 {
644  return _aux_mem;
645 }
646 } // namespace opencl
647 } // namespace arm_compute
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:2042
Shape of a tensor.
Definition: TensorShape.h:39
Quantize using a fixed point multiplication.
experimental::PostOpList< ITensorInfo * > post_ops
void prepare(ITensorPack &constants) override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:490
bool enabled() const
Check if initialised.
Definition: Types.h:1675
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
Definition: ITensorPack.cpp:49
static CLScheduler & get()
Access the scheduler singleton.
ActivationLayerInfo act_info
float a() const
Get the alpha value.
Definition: Types.h:1665
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool are_reshaped() const
Flag which specifies if the weights tensor has been reshaped.
Definition: Types.h:1868
1 channel, 1 F32 per channel
void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Set the input and output tensors.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
~ClGemmConv2d()
Default destructor.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
Definition: Types.h:2041
Status class.
Definition: Error.h:52
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8...
Definition: Types.h:2045
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1625
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Definition: Types.h:2040
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:134
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Static function to check if given info will lead to a valid configuration.
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
Definition: Utils.cpp:427
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.cpp:346
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Definition: Types.h:2049
Convolution Layer Weights Information class.
Definition: Types.h:1844
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
std::vector< int32_t > gemmlowp_shifts
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:2047
1 channel, 1 S32 per channel
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:54
Quantization information.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: Utils.h:1107
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
Definition: Utils.cpp:556
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &convolved_dims, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
size_t total_size() const override
Returns the total size of the tensor in bytes.
Definition: TensorInfo.h:250
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:717
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush=true)
Schedule the execution of the passed kernel if possible.
std::vector< int32_t > gemmlowp_multipliers
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:2046
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
Definition: Types.h:2038
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
Descriptor used by the 2d Convolution function.
Num samples, channels, height, width.
CLCompileContext class.
src_info set_data_layout(data_layout)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1052
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
Definition: Types.h:2043
experimental::PostOpList< ITensorInfo * > post_ops
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:64
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
size_t size() const
Number of post ops.
Definition: IPostOp.h:150
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
Tensor packing service.
Definition: ITensorPack.h:39
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
int offset_int_vec(int offset)
Definition: MemoryHelpers.h:38
GEMM information class.
Definition: Types.h:2090
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
ActivationFunction activation() const
Get the type of activation function.
Definition: Types.h:1660
float b() const
Get the beta value.
Definition: Types.h:1670
quantized, asymmetric fixed-point 8-bit number signed
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
Static function to check if given info will lead to a valid configuration.
Definition: ClGemm.cpp:511
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8...
Definition: Types.h:2044
void tune_kernel_static(ICLKernel &kernel)
Tunes OpenCL kernel.
Definition: CLScheduler.cpp:86
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:234
DataType
Available data types.
Definition: Types.h:79
DataLayout
[DataLayout enum definition]
Definition: Types.h:113
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
A sequence of PostOps that can be appended to the end of other operators.
Definition: IPostOp.h:119
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
Definition: Utils.h:564
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:79
bool retain_internal_weights() const
Definition: Types.h:1888
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
Definition: ITensorPack.cpp:39
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.