Compute Library
 23.08
ClGemmConv2d.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
30 #include "arm_compute/core/Utils.h"
44 
45 #include "src/common/utils/Log.h"
46 #include "support/Cast.h"
47 
48 namespace arm_compute
49 {
50 using namespace experimental;
51 using namespace misc::shape_calculator;
52 using namespace utils::cast;
53 namespace opencl
54 {
56  : _weights_reshape_kernel(nullptr), _im2col_kernel(nullptr), _mm_gemm(nullptr), _mm_gemmlowp(nullptr), _col2im_kernel(nullptr), _activation_kernel(nullptr), _im2col_output(), _weights_reshaped(),
57  _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _append_bias(false), _is_prepared(false), _use_post_ops(false), _aux_mem(AuxTensorIdx::Count)
58 {
59 }
60 ClGemmConv2d::~ClGemmConv2d() = default;
61 
62 void ClGemmConv2d::configure_mm(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
63  const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
65 {
67  ARM_COMPUTE_ERROR_THROW_ON(validate_mm(src, weights, biases, dst, gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info));
68 
69  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
70  false, // is_b_reshaped
71  true, // reshape_b_only_on_first_run
72  gemm_3d_depth, // depth_output_gemm3d
73  _skip_im2col, // reinterpret_input_as_3d
74  false, // retain_internal_weights
75  gemmlowp_output_stage, // gemmlowp_output_stage
76  false, // fast_math
77  false, // fp_mixed_precision
78  true, // broadcast_bias
79  act_info, // activation_info
80  post_ops // post ops
81  );
82 
83  TensorInfo tmp_src{ *src };
84  if(_is_quantized)
85  {
86  ARM_COMPUTE_ERROR_ON_MSG(post_ops.size() > 0, "ClGemmConv2d quantized types do not support post ops");
87  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
88  // Extract and negate input and weights offset
89  const QuantizationInfo input_quantization_info = src->quantization_info();
90  const QuantizationInfo weights_quantization_info = weights->quantization_info();
91 
92  tmp_src.set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
93  weights->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
94 
95  _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>();
96  _mm_gemmlowp->configure(compile_context, &tmp_src, weights, biases, dst, gemm_info);
97 
98  // Revert back QuantizatioInfo as weights could be used in other convolution layers
99  weights->set_quantization_info(weights_quantization_info);
100 
101  auto mm_mem_req = _mm_gemmlowp->workspace();
102  for(unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
103  {
104  _aux_mem[cont] = mm_mem_req[cont];
105  }
106  }
107  else
108  {
109  // Configure matrix multiply function
110  _mm_gemm = std::make_unique<ClGemm>();
111  _mm_gemm->configure(compile_context, &tmp_src, weights, biases, dst, 1.0f, 1.0f, gemm_info);
112  auto mm_mem_req = _mm_gemm->workspace();
113  for(unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
114  {
115  _aux_mem[cont] = mm_mem_req[cont];
116  }
117  }
118 }
119 
120 Status ClGemmConv2d::validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
121  const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info, const experimental::PostOpList<ITensorInfo *> &post_ops)
122 {
123  const bool is_quantized = is_data_type_quantized_asymmetric(src->data_type());
124 
125  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
126  false, // is_b_reshaped
127  true, // reshape_b_only_on_first_run
128  gemm_3d_depth, // depth_output_gemm3d
129  skip_im2col, // reinterpret_input_as_3d
130  false, // retain_internal_weights
131  gemmlowp_output_stage, // gemmlowp_output_stage
132  false, // fast_math
133  false, // fp_mixed_precision
134  true, // broadcast_bias
135  act_info, // activation_info
136  post_ops // post ops
137  );
138 
139  if(is_quantized)
140  {
141  ARM_COMPUTE_RETURN_ERROR_ON_MSG(post_ops.size() > 0, "ClGemmConv2d quantized types do not support post ops");
142  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
143  // Extract and negate input and weights offset
144  const QuantizationInfo input_quantization_info = src->quantization_info();
145  const QuantizationInfo weights_quantization_info = weights->quantization_info();
146 
147  std::unique_ptr<ITensorInfo> src_qa = src->clone();
148  std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
149  src_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
150  weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
151 
152  // Perform validation step on GEMMLowp
153  return ClGemmLowpMatrixMultiplyCore::validate(src_qa.get(), weights_qa.get(), biases, dst, gemm_info);
154  }
155  else
156  {
157  // Perform validation step on Matrix multiply function
158  return ClGemm::validate(src, weights, biases, dst, 1.0f, 1.0f, gemm_info);
159  }
160 }
161 
163  const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info)
164 {
166 
168  conv2d_info,
169  weights_info));
170  ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv2d_info, weights_info);
171 
172  const DataType data_type = src->data_type();
173  const DataLayout data_layout = src->data_layout();
177 
178  const unsigned int kernel_width = weights->dimension(idx_width);
179  const unsigned int kernel_height = weights->dimension(idx_height);
180  const unsigned int num_kernels = weights->dimension(idx_kernels);
181 
182  const UniformQuantizationInfo iq_info = src->quantization_info().uniform();
183  const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
184 
185  _is_prepared = weights_info.retain_internal_weights();
186  _is_quantized = is_data_type_quantized_asymmetric(src->data_type());
187  _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv2d_info.conv_info.stride().first == 1 && conv2d_info.conv_info.stride().second == 1);
188  _skip_col2im = data_layout == DataLayout::NHWC;
189 
190  // Only for quantize there are few cases where we cannot fuse the activation function in GEMM
191  _fuse_activation = true;
192  _use_post_ops = conv2d_info.post_ops.size() > 0;
193 
194  const ITensorInfo *gemm_input_to_use = src;
195  ITensorInfo *gemm_output_to_use = dst;
196 
197  // Get parameters from conv_info
198  unsigned int stride_x = 0;
199  unsigned int stride_y = 0;
200  std::tie(stride_x, stride_y) = conv2d_info.conv_info.stride();
201 
202  // Get convolved dimensions
203  unsigned int conv_w = 0;
204  unsigned int conv_h = 0;
205  std::tie(conv_w, conv_h) = scaled_dimensions(src->dimension(idx_width),
206  src->dimension(idx_height),
207  kernel_width,
208  kernel_height,
209  conv2d_info.conv_info,
210  conv2d_info.dilation);
211 
212  unsigned int mat_weights_cols = num_kernels / conv2d_info.num_groups;
213 
214  ITensorInfo *biases_to_use = biases;
215  _append_bias = false;
216 
217  _weights_reshape_kernel = std::make_unique<kernels::ClWeightsReshapeKernel>();
218  if(conv2d_info.num_groups != 1 && biases != nullptr)
219  {
220  // num_groups != 1 can only be for NCHW
221  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
222  biases_to_use = nullptr;
223  _append_bias = true;
224  _weights_reshape_kernel->configure(compile_context, weights, biases, &_weights_reshaped, conv2d_info.num_groups);
225  }
226  else
227  {
228  _weights_reshape_kernel->configure(compile_context, weights, nullptr, &_weights_reshaped, conv2d_info.num_groups);
229  }
230 
231  // Create tensor to store im2col reshaped inputs
232  if(!_skip_im2col)
233  {
234  // Configure and tune im2col. im2col output shape is auto-initialized
235  _im2col_kernel = std::make_unique<opencl::kernels::ClIm2ColKernel>();
236 
237  // Set the GPU target for im2col
238  _im2col_kernel->set_target(CLScheduler::get().target());
239  _im2col_kernel->configure(compile_context, src, &_im2col_output, Size2D(kernel_width, kernel_height), conv2d_info.conv_info, _append_bias, conv2d_info.dilation, conv2d_info.num_groups);
240 
241  // Set quantization info
242  _im2col_output.set_quantization_info(src->quantization_info());
243  CLScheduler::get().tune_kernel_static(*_im2col_kernel);
244 
245  // Update GEMM input
246  gemm_input_to_use = &_im2col_output;
247  }
248 
249  // Create GEMM output tensor
250  if(!_skip_col2im)
251  {
252  TensorShape shape_gemm;
253 
254  // If we cannot skip col2im it means we run im2col as well
255  shape_gemm = _im2col_output.tensor_shape();
256  shape_gemm.set(0, mat_weights_cols);
257  shape_gemm.set(1, conv_w * conv_h);
258 
259  _gemm_output = TensorInfo(shape_gemm, 1, data_type);
260  _gemm_output.set_quantization_info(dst->quantization_info()).set_data_layout(src->data_layout());
261 
262  // Update GEMM output
263  gemm_output_to_use = &_gemm_output;
264  }
265 
266  GEMMLowpOutputStageInfo gemmlowp_output_stage;
268  gemmlowp_output_stage.gemmlowp_offset = 0;
269 
270  // Configure output stage for quantized case
271  if(_is_quantized)
272  {
273  const auto output_quant_info = (dst->total_size() == 0) ? iq_info : oq_info;
274  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type());
275  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
276 
277  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
278 
279  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
280  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
282  gemmlowp_output_stage.gemmlowp_multipliers.data(),
283  gemmlowp_output_stage.gemmlowp_shifts.data());
284  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
285  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
286 
287  PixelValue min_val{};
288  PixelValue max_val{};
289  std::tie(min_val, max_val) = get_min_max(dst->data_type());
290 
291  auto min_activation = min_val.get<int32_t>();
292  auto max_activation = max_val.get<int32_t>();
293 
294  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
295  ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
296  ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
297  };
298 
299  if(conv2d_info.act_info.enabled())
300  {
301  if(supported_acts.count(conv2d_info.act_info.activation()) != 0)
302  {
303  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(conv2d_info.act_info, data_type, output_quant_info);
304  }
305  else
306  {
307  _fuse_activation = false;
308  }
309  }
310 
311  // Set the GEMMLowp output stage info
312  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
313  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
314  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
315  }
316 
317  // Configure and tune GEMM
318  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
319  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
320 
321  configure_mm(compile_context, gemm_input_to_use, &_weights_reshaped, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, conv2d_info.act_info, conv2d_info.post_ops);
322 
323  if(!_skip_col2im)
324  {
325  ARM_COMPUTE_ERROR_ON_MSG(conv2d_info.post_ops.size() > 0, "ClGemmConv2d does not support post ops with col2im operation"); // Post ops must be performed after every other op
326  // Set the GPU target for col2im
327  _col2im_kernel = std::make_unique<opencl::kernels::ClCol2ImKernel>();
328  _col2im_kernel->set_target(CLScheduler::get().target());
329  // Configure and tune Col2Im
330  _col2im_kernel->configure(compile_context, gemm_output_to_use, dst, Size2D(conv_w, conv_h), conv2d_info.num_groups);
331  CLScheduler::get().tune_kernel_static(*_col2im_kernel.get());
332  }
333 
334  ARM_COMPUTE_ERROR_ON_MSG((dst->dimension(idx_width) != conv_w) || (dst->dimension(idx_height) != conv_h),
335  "Output shape does not match the expected one");
336 
337  // Disable running of activation kernel if post ops are used
338  if(!_fuse_activation && !_use_post_ops)
339  {
340  _activation_kernel = std::make_unique<opencl::kernels::ClActivationKernel>();
341  _activation_kernel->configure(compile_context, dst, nullptr, conv2d_info.act_info);
342  }
343 
344  _aux_mem[Im2ColOutput] = MemoryInfo(offset_int_vec(Im2ColOutput), MemoryLifetime::Temporary, _im2col_output.total_size());
345  _aux_mem[WeightsReshaped] = MemoryInfo(offset_int_vec(WeightsReshaped), MemoryLifetime::Persistent, _weights_reshaped.total_size());
346  _aux_mem[GemmOutput] = MemoryInfo(offset_int_vec(GemmOutput), MemoryLifetime::Temporary, _gemm_output.total_size());
347 }
348 
349 Status ClGemmConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,
350  const WeightsInfo &weights_info)
351 {
353  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
355  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type());
356 
357  if(!is_quantized_per_channel)
358  {
360  }
362  ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
363  ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_type() == DataType::QASYMM8), "Grouping (num_groups != 1) is not supported with QASYMM8");
364  ARM_COMPUTE_RETURN_ERROR_ON(((src->dimension(2) / weights->dimension(2)) != conv2d_info.num_groups) && (src->data_layout() == DataLayout::NCHW));
365 
366  const DataLayout data_layout = src->data_layout();
367  const DataType data_type = src->data_type();
372 
373  const unsigned int kernel_width = weights->dimension(idx_width);
374  const unsigned int kernel_height = weights->dimension(idx_height);
375  const unsigned int num_kernels = weights->dimension(idx_kernels);
376 
377  TensorInfo im2col_reshaped_info{};
378  TensorInfo info_gemm{};
379  TensorInfo weights_reshaped_info{};
380  const ITensorInfo *gemm_input_to_use = src;
381  const ITensorInfo *gemm_output_to_use = dst;
382  const ITensorInfo *weights_to_use = weights;
383  const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
384  const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv2d_info.conv_info.stride().first == 1
385  && conv2d_info.conv_info.stride().second == 1);
386  const bool skip_col2im = data_layout == DataLayout::NHWC;
387  bool fuse_activation = true;
388  bool use_post_ops = conv2d_info.post_ops.size() > 0;
389 
390  ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * conv2d_info.num_groups) != src->dimension(idx_channel));
393  && conv2d_info.post_ops.size() > 0,
394  "ClGemmConv2d does not support post ops with col2im or im2col operation"); // Post ops must be performed after every other op
395 
396  // Validate biases
397  if(biases != nullptr)
398  {
399  if(is_quantized)
400  {
402  }
403  else
404  {
406  }
407  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
409  }
410 
411  if(conv2d_info.act_info.enabled())
412  {
413  ARM_COMPUTE_ERROR_ON(conv2d_info.act_info.b() > conv2d_info.act_info.a());
414  }
415 
416  // Get convolved dimensions
417  unsigned int conv_w = 0;
418  unsigned int conv_h = 0;
419 
420  std::tie(conv_w, conv_h) = scaled_dimensions(src->dimension(idx_width),
421  src->dimension(idx_height),
422  kernel_width,
423  kernel_height,
424  conv2d_info.conv_info,
425  conv2d_info.dilation);
426 
427  unsigned int mat_weights_cols = num_kernels / conv2d_info.num_groups;
428 
429  const ITensorInfo *biases_to_use = biases;
430  bool append_bias = false;
431 
432  if(conv2d_info.num_groups != 1 && biases != nullptr)
433  {
434  // num_groups != 1 can only be for NCHW
435  // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor
436  biases_to_use = nullptr;
437  append_bias = true;
438  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, true, conv2d_info.num_groups), 1, data_type);
439  }
440  else
441  {
442  weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, false, conv2d_info.num_groups), 1, data_type);
443  }
444 
445  weights_to_use = &weights_reshaped_info;
446 
447  if(!skip_im2col)
448  {
449  const Size2D kernel_dims(kernel_width, kernel_height);
450 
451  // Output tensor auto initialization if not yet initialized
452  TensorShape expected_output_shape = compute_im2col_conv_shape(src, kernel_dims, conv2d_info.conv_info, append_bias, conv2d_info.dilation, conv2d_info.num_groups == 1, conv2d_info.num_groups);
453 
454  auto_init_if_empty(im2col_reshaped_info, src->clone()->set_tensor_shape(expected_output_shape));
455 
456  ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClIm2ColKernel::validate(src, &im2col_reshaped_info, kernel_dims, conv2d_info.conv_info, append_bias, conv2d_info.dilation, conv2d_info.num_groups));
457  gemm_input_to_use = &im2col_reshaped_info;
458  }
459 
460  // Create GEMM output tensor
461  if(!skip_col2im)
462  {
463  TensorShape shape_gemm;
464 
465  shape_gemm = gemm_input_to_use->tensor_shape();
466  shape_gemm.set(0, mat_weights_cols);
467  shape_gemm.set(1, conv_w * conv_h);
468 
469  info_gemm = TensorInfo(shape_gemm, 1, data_type);
470  info_gemm.set_quantization_info(dst->quantization_info()).set_data_layout(src->data_layout());
471  gemm_output_to_use = &info_gemm;
472  }
473 
474  GEMMLowpOutputStageInfo gemmlowp_output_stage;
476  gemmlowp_output_stage.gemmlowp_offset = 0;
477  gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel;
478 
479  if(is_quantized)
480  {
481  const UniformQuantizationInfo iq_info = src->quantization_info().uniform();
482  const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
483  const auto output_quant_info = (dst->total_size() == 0) ? iq_info : oq_info;
484  const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
485 
486  gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
487  gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
489  gemmlowp_output_stage.gemmlowp_multipliers.data(),
490  gemmlowp_output_stage.gemmlowp_shifts.data());
491  gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
492  gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
493 
494  int min_activation = 0;
495  int max_activation = 0;
496 
497  const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
498  ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
499  ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
500  };
501 
502  if(conv2d_info.act_info.enabled())
503  {
504  if(supported_acts.count(conv2d_info.act_info.activation()) != 0)
505  {
506  std::tie(min_activation, max_activation) = get_quantized_activation_min_max(conv2d_info.act_info, data_type, output_quant_info);
507  }
508  else
509  {
510  fuse_activation = false;
511  }
512  }
513 
514  // Set the GEMMLowp output stage info
515  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
516  gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
517  gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
518  }
519 
520  // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
521  const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
522 
523  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, conv2d_info.act_info,
524  conv2d_info.post_ops));
525 
526  // Validate Col2Im
527  if(!skip_col2im)
528  {
529  ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClCol2ImKernel::validate(gemm_output_to_use, dst, Size2D(conv_w, conv_h), conv2d_info.num_groups));
530  }
531 
532  // Validate Activation Layer
533  // Disable running (thus validation) of activation kernel if post ops are used
534  if(!fuse_activation && !use_post_ops)
535  {
537  }
538 
539  return Status{};
540 }
541 
543 {
544  prepare(tensors);
545 
546  auto src = tensors.get_const_tensor(ACL_SRC_0);
547  auto biases = tensors.get_const_tensor(ACL_SRC_2);
548  auto dst = tensors.get_tensor(ACL_DST);
549  auto gemm_input_to_use = src;
550  auto gemm_output_to_use = dst;
551 
552  CLAuxTensorHandler im2col_output(offset_int_vec(Im2ColOutput), _im2col_output, tensors, false);
553  CLAuxTensorHandler gemm_output(offset_int_vec(GemmOutput), _gemm_output, tensors, false);
554  CLAuxTensorHandler weights_reshaped(offset_int_vec(WeightsReshaped), _weights_reshaped, tensors, false);
555 
556  // Run im2col
557  if(!_skip_im2col)
558  {
559  ITensorPack pack =
560  {
562  { TensorType::ACL_DST, im2col_output.get() }
563  };
564  CLScheduler::get().enqueue_op(*_im2col_kernel, pack, false);
565  gemm_input_to_use = im2col_output.get();
566  }
567  if(!_skip_col2im)
568  {
569  gemm_output_to_use = gemm_output.get();
570  }
571  ITensorPack pack_mm = tensors;
572  pack_mm.add_const_tensor(TensorType::ACL_SRC_0, gemm_input_to_use);
573  pack_mm.add_const_tensor(TensorType::ACL_SRC_1, weights_reshaped.get());
574  if(!_append_bias)
575  {
576  pack_mm.add_const_tensor(TensorType::ACL_SRC_2, biases);
577  }
578  pack_mm.add_tensor(TensorType::ACL_DST, gemm_output_to_use);
579  // Runs ClGemm or ClGemmLowpMatrixMultiplyCore functions
580  if(_is_quantized)
581  {
582  // Run gemmlowp
583  _mm_gemmlowp->run(pack_mm);
584  }
585  else
586  {
587  // Run gemm
588  _mm_gemm->run(pack_mm);
589  }
590 
591  // Reshape output matrix
592  if(!_skip_col2im)
593  {
594  ITensorPack pack =
595  {
596  { TensorType::ACL_SRC, gemm_output_to_use },
598  };
599  CLScheduler::get().enqueue_op(*_col2im_kernel.get(), pack, false);
600  }
601 
602  //Run Activation Layer if we cannot fuse in GEMM
603  // Disable running of activation kernel if post ops are used
604  if(!_fuse_activation && !_use_post_ops)
605  {
606  ITensorPack pack =
607  {
610  };
611  CLScheduler::get().enqueue_op(*_activation_kernel.get(), pack, false);
612  }
613 }
614 
616 {
617  if(!_is_prepared)
618  {
619  // Run weights reshaping and mark original weights tensor as unused
620  ICLTensor *weights_reshaped_p = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(offset_int_vec(WeightsReshaped)));
621  CLAuxTensorHandler weights_reshaped(_weights_reshaped, *weights_reshaped_p);
622  auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
623  ITensorPack pack =
624  {
625  { TensorType::ACL_SRC, weights },
626  { TensorType::ACL_DST, weights_reshaped.get() }
627  };
628 
629  if(_append_bias)
630  {
631  const auto biases = tensors.get_const_tensor(TensorType::ACL_SRC_2);
633  }
634  CLScheduler::get().enqueue_op(*_weights_reshape_kernel.get(), pack, true);
635  tensors.add_const_tensor(TensorType::ACL_SRC_1, weights_reshaped.get());
636 
637  // Prepare GEMM
638  _is_quantized ? _mm_gemmlowp->prepare(tensors) : _mm_gemm->prepare(tensors);
639  _is_prepared = true;
640  }
641 }
643 {
644  return _aux_mem;
645 }
646 } // namespace opencl
647 } // namespace arm_compute
arm_compute::DataLayout::NCHW
@ NCHW
Num samples, channels, height, width.
Cast.h
arm_compute::opencl::ClGemmConv2d::~ClGemmConv2d
~ClGemmConv2d()
Default destructor.
arm_compute::experimental::MemoryRequirements
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:134
arm_compute::WeightsInfo
Convolution Layer Weights Information class.
Definition: Types.h:1611
arm_compute::UniformQuantizationInfo::offset
int32_t offset
Definition: QuantizationInfo.h:64
arm_compute::test::validation::src
SimpleTensor< float > src
Definition: DFT.cpp:155
ICLTensor.h
arm_compute::ITensorInfo::tensor_shape
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
arm_compute::QuantizationInfo
Quantization information.
Definition: QuantizationInfo.h:68
arm_compute::PixelValue
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:35
arm_compute::test::validation::idx_height
const int idx_height
Definition: Scale.cpp:263
arm_compute::DataLayout
DataLayout
[DataLayout enum definition]
Definition: CoreTypes.h:109
arm_compute::DataLayoutDimension::CHANNEL
@ CHANNEL
channel
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_multiplier
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: GEMMInfo.h:49
arm_compute::GEMMLowpOutputStageInfo
GEMMLowp output stage info.
Definition: GEMMInfo.h:45
ClActivationKernel.h
arm_compute::DataLayout::NHWC
@ NHWC
Num samples, height, width, channels.
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_offset
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
Definition: GEMMInfo.h:48
arm_compute::test::validation::weights_info
weights_info
Definition: BatchNormalizationLayer.cpp:165
arm_compute::DataType::QASYMM8
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
arm_compute::TensorShape
Shape of a tensor.
Definition: TensorShape.h:39
arm_compute::test::validation::dst
auto dst
Definition: DFT.cpp:170
arm_compute::GEMMLowpOutputStageInfo::type
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Definition: GEMMInfo.h:47
arm_compute::get_quantized_activation_min_max
std::pair< int32_t, int32_t > get_quantized_activation_min_max(const ActivationLayerInfo &act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
Definition: Utils.cpp:417
arm_compute::ICLTensor
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
arm_compute::scaled_dimensions
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
Definition: Utils.cpp:288
arm_compute::Size2D
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
arm_compute::GEMMLowpOutputStageInfo::is_quantized_per_channel
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Definition: GEMMInfo.h:56
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_shifts
std::vector< int32_t > gemmlowp_shifts
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: GEMMInfo.h:54
TensorInfo.h
arm_compute::ITensorPack::add_tensor
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
Definition: ITensorPack.cpp:39
arm_compute::GEMMInfo
GEMM information class.
Definition: GEMMInfo.h:64
arm_compute::ITensorPack::get_tensor
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:64
arm_compute::opencl::ClGemmConv2d::ClGemmConv2d
ClGemmConv2d()
Constructor.
Definition: ClGemmConv2d.cpp:55
arm_compute::test::validation::idx_width
const int idx_width
Definition: Scale.cpp:262
arm_compute::ACL_SRC_0
@ ACL_SRC_0
Definition: Types.h:45
arm_compute::ActivationLayerInfo::activation
ActivationFunction activation() const
Get the type of activation function.
Definition: ActivationLayerInfo.h:76
arm_compute::UniformQuantizationInfo
Quantization info when assuming per layer quantization.
Definition: QuantizationInfo.h:41
arm_compute::ACL_SRC_1
@ ACL_SRC_1
Definition: Types.h:46
arm_compute::ITensorPack::add_const_tensor
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
Definition: ITensorPack.cpp:49
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:630
arm_compute::DataLayoutDimension::WIDTH
@ WIDTH
width
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:877
arm_compute::ACL_SRC_2
@ ACL_SRC_2
Definition: Types.h:47
arm_compute::Conv2dInfo
Descriptor used by the 2d Convolution function.
Definition: FunctionDescriptors.h:57
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_max_bound
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8.
Definition: GEMMInfo.h:52
arm_compute::test::validation::data_layout
const auto data_layout
Definition: ConvolutionLayer.cpp:406
ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
arm_compute::ITensorInfo::dimension
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
arm_compute::ActivationLayerInfo
Activation Layer Information class.
Definition: ActivationLayerInfo.h:55
arm_compute::misc::shape_calculator::compute_im2col_conv_shape
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1, unsigned int input_pad_right=0)
Calculate the im2col output shape of a tensor.
Definition: ShapeCalculator.h:525
arm_compute::test::validation::act_info
act_info
Definition: DirectConvolutionLayer.cpp:547
arm_compute::Conv2dInfo::dilation
Size2D dilation
Definition: FunctionDescriptors.h:73
arm_compute::Conv2dInfo::conv_info
PadStrideInfo conv_info
Definition: FunctionDescriptors.h:72
ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
arm_compute::experimental::MemoryInfo
Definition: Types.h:96
ARM_COMPUTE_ERROR_ON
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:467
arm_compute::TensorInfo::set_quantization_info
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.cpp:380
arm_compute::ITensorPack::get_const_tensor
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:54
arm_compute::quantization::compute_quantized_multipliers_and_shifts
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
Definition: AsymmHelpers.cpp:217
arm_compute::opencl::ClGemmConv2d::run
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
Definition: ClGemmConv2d.cpp:542
ClWeightsReshapeKernel.h
ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:456
arm_compute::ActivationLayerInfo::enabled
bool enabled() const
Check if initialised.
Definition: ActivationLayerInfo.h:91
arm_compute::ITensorPack
Tensor packing service.
Definition: ITensorPack.h:39
arm_compute::Conv2dInfo::num_groups
unsigned int num_groups
Definition: FunctionDescriptors.h:76
arm_compute::opencl::ClGemmConv2d::workspace
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
Definition: ClGemmConv2d.cpp:642
arm_compute::test::validation::gemm_info
gemm_info
Definition: GEMMMatrixMultiplyReshaped.cpp:862
arm_compute::CLCompileContext
CLCompileContext class.
Definition: CLCompileContext.h:204
arm_compute::DataLayoutDimension::HEIGHT
@ HEIGHT
height
ARM_COMPUTE_ERROR_ON_MSG
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:457
ARM_COMPUTE_RETURN_ERROR_ON
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:297
arm_compute::TensorInfo::total_size
size_t total_size() const override
Returns the total size of the tensor in bytes.
Definition: TensorInfo.h:251
arm_compute::ACL_DST
@ ACL_DST
Definition: Types.h:55
arm_compute::auto_init_if_empty
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: AutoConfiguration.h:43
arm_compute::test::validation::post_ops
experimental::PostOpList< ITensorInfo * > post_ops
Definition: ConvolutionLayer.cpp:413
arm_compute::Status
Status class.
Definition: Error.h:52
arm_compute::DataType::QASYMM8_SIGNED
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
CLScheduler.h
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
arm_compute::ITensorInfo::data_type
virtual DataType data_type() const =0
Data type used for each element of the tensor.
arm_compute::is_data_type_quantized_per_channel
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: DataTypeUtils.h:401
ClAuxTensorHandler.h
arm_compute::Conv2dInfo::act_info
ActivationLayerInfo act_info
Definition: FunctionDescriptors.h:74
arm_compute::Conv2dInfo::post_ops
experimental::PostOpList< ITensorInfo * > post_ops
Definition: FunctionDescriptors.h:77
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:579
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_multipliers
std::vector< int32_t > gemmlowp_multipliers
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: GEMMInfo.h:53
arm_compute::QuantizationInfo::uniform
UniformQuantizationInfo uniform() const
Return per layer quantization info.
Definition: QuantizationInfo.h:147
arm_compute::CLScheduler::tune_kernel_static
void tune_kernel_static(ICLKernel &kernel)
Tunes OpenCL kernel.
Definition: CLScheduler.cpp:82
arm_compute::opencl::kernels::ClActivationKernel::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
Definition: ClActivationKernel.cpp:227
arm_compute::opencl::ClGemmConv2d::configure
void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Set the input and output tensors.
Definition: ClGemmConv2d.cpp:162
arm_compute::test::validation::pack
ITensorPack pack
Definition: Im2Col.cpp:188
Size2D.h
PixelValue.h
arm_compute::opencl::CLAuxTensorHandler
Definition: ClAuxTensorHandler.h:39
arm_compute::opencl::kernels::ClIm2ColKernel::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
Definition: ClIm2ColKernel.cpp:356
arm_compute::CLScheduler::get
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:103
AutoConfiguration.h
arm_compute::test::validation::data_type
data_type
Definition: Cast.cpp:223
arm_compute::opencl::ClGemmLowpMatrixMultiplyCore::validate
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration.
Definition: ClGemmLowpMatrixMultiplyCore.cpp:472
arm_compute::ITensorInfo::quantization_info
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
arm_compute::opencl::ClGemmConv2d::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Static function to check if given info will lead to a valid configuration.
Definition: ClGemmConv2d.cpp:349
ClGemm.h
AsymmHelpers.h
MemoryHelpers.h
Utils.h
arm_compute::get_data_layout_dimension_index
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:203
arm_compute::ActivationLayerInfo::a
float a() const
Get the alpha value.
Definition: ActivationLayerInfo.h:81
ShapeCalculator.h
arm_compute::TensorInfo
Store the tensor's metadata.
Definition: TensorInfo.h:42
arm_compute::UniformQuantizationInfo::scale
float scale
Definition: QuantizationInfo.h:63
arm_compute::offset_int_vec
int offset_int_vec(int offset)
Definition: MemoryHelpers.h:38
arm_compute::misc::shape_calculator::compute_weights_reshaped_shape
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
Definition: ShapeCalculator.h:151
arm_compute::ACL_BIAS
@ ACL_BIAS
Definition: Types.h:74
ARM_COMPUTE_RETURN_ERROR_ON_MSG
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:245
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
ClGemmLowpMatrixMultiplyCore.h
arm_compute::opencl::kernels::ClCol2ImKernel::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &convolved_dims, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
Definition: ClCol2ImKernel.cpp:135
arm_compute::DataType::F16
@ F16
16-bit floating-point number
ClIm2ColKernel.h
arm_compute::DataType::S32
@ S32
signed 32-bit number
Log.h
arm_compute::is_data_type_quantized_asymmetric
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: DataTypeUtils.h:346
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_shift
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
Definition: GEMMInfo.h:50
arm_compute::CLScheduler::enqueue_op
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush=true)
Schedule the execution of the passed kernel if possible.
Definition: CLScheduler.cpp:211
arm_compute::GEMMLowpOutputStageInfo::gemmlowp_min_bound
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8.
Definition: GEMMInfo.h:51
arm_compute::opencl::CLAuxTensorHandler::get
ICLTensor * get()
Definition: ClAuxTensorHandler.h:94
arm_compute::ACL_SRC
@ ACL_SRC
Definition: Types.h:44
arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:43
arm_compute::DataLayoutDimension::BATCHES
@ BATCHES
batches
arm_compute::ITensorInfo::set_data_layout
virtual ITensorInfo & set_data_layout(const DataLayout &data_layout)=0
Set the data layout of the tensor.
arm_compute::DataType::F32
@ F32
32-bit floating-point number
arm_compute::get_min_max
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
Definition: DataTypeUtils.h:195
arm_compute::TensorShape::set
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:79
arm_compute::ActivationLayerInfo::b
float b() const
Get the beta value.
Definition: ActivationLayerInfo.h:86
arm_compute::opencl::ClGemm::validate
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
Static function to check if given info will lead to a valid configuration.
Definition: ClGemm.cpp:612
ARM_COMPUTE_LOG_PARAMS
#define ARM_COMPUTE_LOG_PARAMS(...)
Definition: Log.h:35
Validate.h
arm_compute::GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT
@ QUANTIZE_DOWN_FIXEDPOINT
Quantize using a fixed point multiplication.
arm_compute::DataType
DataType
Available data types.
Definition: CoreTypes.h:82
ClGemmConv2d.h
arm_compute::TensorInfo::tensor_shape
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:235
arm_compute::ITensorInfo::set_quantization_info
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
ClCol2ImKernel.h
arm_compute::ITensorInfo::num_dimensions
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
arm_compute::PadStrideInfo::stride
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: CoreTypes.h:186
arm_compute::opencl::ClGemmConv2d::prepare
void prepare(ITensorPack &constants) override
Prepare the function for executing.
Definition: ClGemmConv2d.cpp:615
arm_compute::experimental::PostOpList
A sequence of PostOps that can be appended to the end of other operators.
Definition: IPostOp.h:119