Compute Library
 21.08
CLGEMMDeconvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
33 
34 #include <tuple>
35 
36 namespace arm_compute
37 {
38 namespace
39 {
40 std::pair<Coordinates, Coordinates> compute_start_end_slice_coordinates(const ITensorInfo &output_info, const PadStrideInfo &deconv_info, bool is_nchw)
41 {
42  Coordinates start;
43  Coordinates end;
44 
45  if(is_nchw)
46  {
47  start.set(0, deconv_info.pad_left());
48  start.set(1, deconv_info.pad_top());
49  end.set(0, output_info.dimension(0) - deconv_info.pad_right());
50  end.set(1, output_info.dimension(1) - deconv_info.pad_bottom());
51  }
52  else
53  {
54  start.set(0, 0);
55  start.set(1, deconv_info.pad_left());
56  start.set(2, deconv_info.pad_top());
57 
58  end.set(0, output_info.dimension(0));
59  end.set(1, output_info.dimension(1) - deconv_info.pad_right());
60  end.set(2, output_info.dimension(2) - deconv_info.pad_bottom());
61  }
62 
63  return { start, end };
64 }
65 Status construct_gemmlowp_output_stage(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, GEMMLowpOutputStageInfo &output_stage_info)
66 {
67  const auto data_type = input->data_type();
68 
70  {
71  const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
72  const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
73  const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
74 
75  float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
76  int output_multiplier(0);
77  int output_shift(0);
78  ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
79 
81  output_stage_info.gemmlowp_multiplier = output_multiplier;
82  output_stage_info.gemmlowp_shift = output_shift;
83  output_stage_info.gemmlowp_offset = oq_info.offset;
84  const auto min_max_bound = get_min_max(data_type);
85  output_stage_info.gemmlowp_min_bound = (std::get<0>(min_max_bound)).get<int32_t>();
86  output_stage_info.gemmlowp_max_bound = (std::get<1>(min_max_bound)).get<int32_t>();
87  output_stage_info.output_data_type = data_type;
88  }
89  return Status{};
90 }
91 
92 } // namespace
93 
94 CLGEMMDeconvolutionLayer::CLGEMMDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
95  : _memory_group(std::move(memory_manager)),
96  _mm_gemm(),
97  _mm_gemmlowp(),
98  _gemmlowp_output_stage(),
99  _permute_input_to_nhwc(),
100  _permute_weights_to_nhwc(),
101  _reshape_weights(),
102  _transpose_weights(),
103  _deconv_reshape(std::make_unique<CLDeconvolutionReshapeOutputKernel>()),
104  _slice_gemm(),
105  _gemmlowp_final(),
106  _reshaped_weights(),
107  _reshaped_weights_t(),
108  _permuted_input(),
109  _permuted_weights(),
110  _gemm_output(),
111  _slice_gemm_input(),
112  _original_weights(),
113  _is_prepared(false),
114  _padded_input(false),
115  _is_nchw(false),
116  _is_quantized(false)
117 {
118 }
119 
121 
122 Status CLGEMMDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &deconv_info)
123 {
124  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
128 
130  const bool padded_input = deconv_info.pad_bottom() > 0 || deconv_info.pad_left() > 0 || deconv_info.pad_right() > 0 || deconv_info.pad_top() > 0;
131  const bool is_nchw = input->data_layout() == DataLayout::NCHW;
132  const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
133 
134  const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
135  const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
136  const size_t idx_b = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
137 
138  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != deconv_info.stride().first);
139  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) != deconv_info.stride().second);
140 
141  TensorShape nhwc_weights_shape = weights->tensor_shape();
142  TensorShape nhwc_input_shape = input->tensor_shape();
143 
144  if(is_nchw)
145  {
146  permute(nhwc_weights_shape, PermutationVector(2, 0, 1));
147  permute(nhwc_input_shape, PermutationVector(2, 0, 1));
148 
149  TensorInfo nhwc_input_info = input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(nhwc_input_shape).set_data_layout(DataLayout::NCHW);
150 
151  TensorInfo nhwc_weights_info = weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(nhwc_weights_shape).set_data_layout(DataLayout::NCHW);
152 
153  CLPermute::validate(weights, &nhwc_weights_info, PermutationVector(2, 0, 1));
154  CLPermute::validate(input, &nhwc_input_info, PermutationVector(2, 0, 1));
155  }
156 
157  const TensorShape reshaped_shape = TensorShape(nhwc_weights_shape[0], nhwc_weights_shape[1] * nhwc_weights_shape[2] * nhwc_weights_shape[3]);
158  const TensorInfo reshaped_info = weights->clone()->set_tensor_shape(reshaped_shape).set_data_layout(DataLayout::NCHW).set_is_resizable(true);
159  ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayer::validate(weights, &reshaped_info));
160 
161  TensorShape transposed_shape(reshaped_shape[1], reshaped_shape[0]);
162  const TensorInfo reshaped_t_info = reshaped_info.clone()->set_is_resizable(true).set_tensor_shape(transposed_shape);
163  ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(&reshaped_info, &reshaped_t_info));
164 
165  TensorShape gemm_output_shape(weights->dimension(idx_w) * weights->dimension(idx_h) * weights->dimension(idx_b),
166  input->dimension(idx_w),
167  input->dimension(idx_h),
168  input->dimension(idx_b));
169 
170  TensorInfo gemm_output_info = reshaped_t_info.clone()->set_tensor_shape(gemm_output_shape).set_is_resizable(true);
171  GEMMInfo gemm_info(false, false, true, input->dimension(idx_h), true);
172 
173  GEMMLowpOutputStageInfo output_stage_info;
174 
175  if(is_quantized)
176  {
177  ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(&input->clone()->set_tensor_shape(nhwc_input_shape), &reshaped_t_info, nullptr, &gemm_output_info.set_data_type(DataType::S32),
178  gemm_info));
179  ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, output_stage_info));
180  }
181  else
182  {
183  ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(&input->clone()->set_tensor_shape(nhwc_input_shape).set_is_resizable(true), &reshaped_t_info, nullptr, &gemm_output_info, 1.0f, 0.0f, gemm_info));
184  }
185 
186  const PadStrideInfo stride_info(deconv_info.stride().first, deconv_info.stride().second);
187  auto out_dims = deconvolution_output_dimensions(input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), weights->dimension(idx_h), stride_info);
188  const TensorShape deconv_shape = misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input, *weights);
189  TensorInfo col2im_output_info = gemm_output_info.clone()->set_tensor_shape(deconv_shape).set_is_resizable(true);
190 
191  if(padded_input && is_quantized)
192  {
193  const auto start_end = compute_start_end_slice_coordinates(col2im_output_info, deconv_info, is_nchw);
194  ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionReshapeOutputKernel::validate(&gemm_output_info, bias, &col2im_output_info, input, weights, deconv_info));
195  ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOutputStage::validate(&col2im_output_info, nullptr, &col2im_output_info.clone()->set_is_resizable(true).set_data_type(input->data_type()), output_stage_info));
196  ARM_COMPUTE_RETURN_ON_ERROR(CLSlice::validate(&col2im_output_info.clone()->set_is_resizable(true).set_data_type(input->data_type()), output, start_end.first, start_end.second));
197  }
198  else if(padded_input)
199  {
200  const auto start_end = compute_start_end_slice_coordinates(col2im_output_info, deconv_info, is_nchw);
201  ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionReshapeOutputKernel::validate(&gemm_output_info, bias, &col2im_output_info, input, weights, deconv_info));
202  ARM_COMPUTE_RETURN_ON_ERROR(CLSlice::validate(&col2im_output_info, output, start_end.first, start_end.second));
203  }
204  else if(is_quantized)
205  {
206  ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionReshapeOutputKernel::validate(&gemm_output_info, bias, &col2im_output_info, input, weights, deconv_info));
207  ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOutputStage::validate(&col2im_output_info, nullptr, output, output_stage_info));
208  }
209  else
210  {
211  ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionReshapeOutputKernel::validate(&gemm_output_info, bias, output, input, weights, deconv_info));
212  }
213 
214  return Status{};
215 }
216 
217 void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info)
218 {
219  configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info);
220 }
221 
222 void CLGEMMDeconvolutionLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
223  const PadStrideInfo &deconv_info)
224 {
225  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
227  weights->info(),
228  bias != nullptr ? bias->info() : nullptr,
229  output->info(),
230  deconv_info));
231 
232  _original_weights = weights;
233  _padded_input = deconv_info.pad_bottom() > 0 || deconv_info.pad_left() > 0 || deconv_info.pad_right() > 0 || deconv_info.pad_top() > 0;
234  _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
235  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
236 
237  const ICLTensor *input_to_use = input;
238  const ICLTensor *weights_to_use = weights;
239 
240  // If the data layout is NCHW, transform everything in NHWC. Another alternative could be to
241  // do an outer product in NCHW and then an accumulation through a reduction. This would have two
242  // drawbacks: first, the outer product is less efficient than a full GEMM. Second, the reduction
243  // might be slower than GEMM.
244  if(_is_nchw)
245  {
246  _memory_group.manage(&_permuted_input);
247  _permute_input_to_nhwc.configure(compile_context, input, &_permuted_input, PermutationVector(2U, 0U, 1U));
248 
249  _permute_weights_to_nhwc.configure(compile_context, weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
250 
251  input_to_use = &_permuted_input;
252  weights_to_use = &_permuted_weights;
253  }
254 
255  // Reshape the input weights. The weights will be reshaped only once during the call to prepare()
256  _reshaped_weights.allocator()->init(TensorInfo(TensorShape(weights_to_use->info()->dimension(0),
257  weights_to_use->info()->dimension(1) * weights_to_use->info()->dimension(2) * weights_to_use->info()->dimension(3)),
258  1,
259  input->info()->data_type(), weights->info()->quantization_info()));
260 
261  _reshape_weights.configure(compile_context, weights_to_use, &_reshaped_weights);
262  _transpose_weights.configure(compile_context, &_reshaped_weights, &_reshaped_weights_t);
263 
265  GEMMInfo gemm_info(false, false, true, input->info()->dimension(idx_h), true);
266 
267  // Configure output stage for asymmetric quantized types
268  if(_is_quantized)
269  {
270  // gemmlowp adds the offsets (instead of subtracting them). Thus, we need to negate the original
271  // and restore them back to make it work properly.
272  QuantizationInfo iq_info = input->info()->quantization_info();
273  QuantizationInfo wq_info = weights->info()->quantization_info();
274 
275  input_to_use->info()->set_quantization_info(QuantizationInfo(iq_info.uniform().scale, -iq_info.uniform().offset));
276  _reshaped_weights_t.info()->set_quantization_info(QuantizationInfo(wq_info.uniform().scale, -wq_info.uniform().offset));
277 
278  _mm_gemmlowp.configure(compile_context, input_to_use, &_reshaped_weights_t, nullptr, &_gemm_output, gemm_info);
279 
280  input_to_use->info()->set_quantization_info(iq_info);
281  _reshaped_weights_t.info()->set_quantization_info(wq_info);
282  }
283  else
284  {
285  _mm_gemm.configure(compile_context, input_to_use, &_reshaped_weights_t, nullptr, &_gemm_output, 1.f, 0.0f, gemm_info);
286  }
287 
288  if(_is_nchw)
289  {
290  _permuted_input.allocator()->allocate();
291  }
292 
293  ICLTensor *deconv_reshape_output = nullptr;
294  ICLTensor *slice_output = nullptr;
295  ICLTensor *output_stage_output = nullptr;
296 
297  if(_padded_input && _is_quantized)
298  {
299  _memory_group.manage(&_slice_gemm_input);
300  _memory_group.manage(&_gemmlowp_final);
301  deconv_reshape_output = &_gemmlowp_final;
302  output_stage_output = &_slice_gemm_input;
303  slice_output = output;
304  }
305  else if(_padded_input)
306  {
307  _memory_group.manage(&_slice_gemm_input);
308  deconv_reshape_output = &_slice_gemm_input;
309  slice_output = output;
310  }
311  else if(_is_quantized)
312  {
313  _memory_group.manage(&_gemmlowp_final);
314  deconv_reshape_output = &_gemmlowp_final;
315  output_stage_output = output;
316  }
317  else
318  {
319  deconv_reshape_output = output;
320  }
321 
322  // Configure a Col2Im call to reshape the output of GEMM
323  _deconv_reshape->configure(compile_context, &_gemm_output, bias, deconv_reshape_output, input->info(), weights->info(), deconv_info);
324  _gemm_output.allocator()->allocate();
325 
326  if(_is_quantized)
327  {
328  GEMMLowpOutputStageInfo output_stage_info;
329  construct_gemmlowp_output_stage(input->info(), weights->info(), output->info(), output_stage_info);
330  _gemmlowp_output_stage.configure(compile_context, &_gemmlowp_final, nullptr, output_stage_output, output_stage_info);
331  _gemmlowp_final.allocator()->allocate();
332  }
333 
334  // If the input was padded, the output needs to be sliced.
335  if(_padded_input)
336  {
337  const auto start_end = compute_start_end_slice_coordinates(*deconv_reshape_output->info(), deconv_info, _is_nchw);
338  _slice_gemm.configure(compile_context, &_slice_gemm_input, slice_output, start_end.first, start_end.second);
339  _slice_gemm_input.allocator()->allocate();
340  }
341 }
342 
344 {
345  prepare();
346 
347  MemoryGroupResourceScope scope_mg(_memory_group);
348 
349  if(_is_nchw)
350  {
351  _permute_input_to_nhwc.run();
352  }
353 
354  if(_is_quantized)
355  {
356  _mm_gemmlowp.run();
357  }
358  else
359  {
360  _mm_gemm.run();
361  }
362 
363  CLScheduler::get().enqueue(*_deconv_reshape, false);
364 
365  if(_is_quantized)
366  {
367  _gemmlowp_output_stage.run();
368  }
369 
370  if(_padded_input)
371  {
372  _slice_gemm.run();
373  }
374 }
375 
377 {
378  if(!_is_prepared)
379  {
380  ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
381 
382  if(_is_nchw)
383  {
384  _permuted_weights.allocator()->allocate();
385  _permute_weights_to_nhwc.run();
386  }
387 
388  _reshaped_weights.allocator()->allocate();
389  _reshape_weights.run();
390 
391  if(_is_nchw)
392  {
393  _permuted_weights.allocator()->free();
394  }
395 
396  _reshaped_weights_t.allocator()->allocate();
397  _transpose_weights.run();
398 
399  // Prepare gemm
400  if(!_is_quantized)
401  {
402  _mm_gemm.prepare();
403  }
404  else
405  {
406  _mm_gemmlowp.prepare();
407  }
408 
409  // Free resources
410  if(!_reshaped_weights_t.is_used())
411  {
412  _reshaped_weights_t.allocator()->free();
413  }
414 
415  _original_weights->mark_as_unused();
416  _is_prepared = true;
417  }
418 }
419 } // namespace arm_compute
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info)
Set the input, weights, biases and output tensors.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLReshapeLayer.
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info)
Static function to check if given info will lead to a valid configuration of CLDeconvolutionReshapeOu...
Shape of a tensor.
Definition: TensorShape.h:39
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
Definition: CLGEMM.cpp:109
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
Definition: TensorInfo.cpp:281
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Definition: CLTensor.cpp:41
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:490
void prepare() override
Prepare the function for executing.
void run() override
Run the kernels contained in the function.
Definition: CLGEMM.cpp:100
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
std::pair< unsigned int, unsigned int > deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, const PadStrideInfo &pad_stride_info)
Returns expected width and height of the deconvolution&#39;s output tensor.
Definition: Utils.cpp:375
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &deconv_info)
Static function to check if given info will lead to a valid configuration of CLDeconvolutionLayer.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
Definition: ITensor.cpp:163
CLGEMMDeconvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
1 channel, 1 F32 per channel
ITensorInfo & set_data_type(DataType data_type) override
Set the data type to the specified value.
Definition: TensorInfo.cpp:286
Strides PermutationVector
Permutation vector.
Definition: Types.h:49
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel&#39;s inputs, output.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
const DataLayout data_layout
Definition: Im2Col.cpp:151
void run() override
Run the kernels contained in the function.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
CLTensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: CLTensor.cpp:61
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel&#39;s inputs, output.
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:731
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Status class.
Definition: Error.h:52
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
void run() override
Run the kernels contained in the function.
Definition: CLPermute.cpp:70
1 channel, 1 F16 per channel
void run() override
Run the kernels contained in the function.
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.cpp:345
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
Definition: Helpers.h:125
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClGemmL...
TensorShape compute_deconvolution_output_shape(const std::pair< unsigned int, unsigned int > &out_dims, const ITensorInfo &input, const ITensorInfo &weights)
Calculate the output shape of the deconvolution layer.
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:168
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
const DataType data_type
Definition: Im2Col.cpp:150
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel&#39;s inputs and output.
Definition: CLGEMM.cpp:69
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
void run() override
Run the kernels contained in the function.
Definition: CLTranspose.cpp:66
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:695
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
Definition: Types.h:1888
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
Static function to check if given info will lead to a valid configuration of CLSlice.
Definition: CLSlice.cpp:79
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
unsigned int pad_right() const
Get the right padding.
Definition: Types.h:726
Padding and stride information class.
Definition: Types.h:647
void end(TokenStream &in, bool &valid)
Definition: MLGOParser.cpp:290
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Num samples, channels, height, width.
CLCompileContext class.
void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
Definition: CLSlice.cpp:84
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1003
Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of de...
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
Definition: CLTranspose.cpp:61
void run() override
Run the kernels contained in the function.
Definition: CLSlice.cpp:97
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
void free() override
Free allocated OpenCL memory.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
Definition: CLGEMM.cpp:95
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
GEMM information class.
Definition: Types.h:1939
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
Definition: CLPermute.cpp:49
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel&#39;s inputs and outputs.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of CLPermute.
Definition: CLPermute.cpp:65
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel&#39;s inputs and output.
Definition: CLTranspose.cpp:47
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
unsigned int pad_bottom() const
Get the bottom padding.
Definition: Types.h:736
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:721
void prepare() override
Prepare the function for executing.
DataLayout
[DataLayout enum definition]
Definition: Types.h:111
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
Definition: Utils.h:564
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
~CLGEMMDeconvolutionLayer()
Default desctructor.