Compute Library
 21.02
CLFullyConnectedLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
44 #include "support/Cast.h"
45 
46 #include <algorithm>
47 
48 namespace arm_compute
49 {
51 using namespace arm_compute::utils::cast;
52 
53 namespace
54 {
55 Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output,
56  GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info)
57 {
58  gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
59  gemmlowp_output_stage.gemmlowp_offset = 0;
60  gemmlowp_output_stage.gemmlowp_multiplier = 0;
61  gemmlowp_output_stage.gemmlowp_shift = 0;
62 
63  const auto data_type = input.data_type();
64 
65  // Configure output stage for quantized case
67  {
68  const QuantizationInfo oq_info = output.quantization_info();
69  const UniformQuantizationInfo iq_unif = input.quantization_info().uniform();
70  const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();
71  const UniformQuantizationInfo oq_unif = oq_info.uniform();
72 
73  const auto output_quant_info = (output.total_size() == 0) ? iq_unif : oq_unif;
74 
75  const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;
76  int output_multiplier = 0;
77  int output_shift = 0;
78  ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
79 
80  PixelValue type_min{};
81  PixelValue type_max{};
82  std::tie(type_min, type_max) = get_min_max(data_type);
83 
84  if(activation_info.enabled())
85  {
86  std::tie(type_min, type_max) = get_quantized_activation_min_max(activation_info, data_type, output_quant_info);
87  }
88 
89  // Set the GEMMLowp output stage info
90  gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
91  gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
92  gemmlowp_output_stage.gemmlowp_shift = output_shift;
93  gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);
94  gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);
95  type_min.get(gemmlowp_output_stage.gemmlowp_min_bound);
96  type_max.get(gemmlowp_output_stage.gemmlowp_max_bound);
97  }
98 
99  return Status{};
100 }
101 
102 Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias, const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info)
103 {
104  GEMMLowpOutputStageInfo gemmlowp_output_stage;
105  ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage, fc_info.activation_info));
106 
107  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
108  false, // is_b_reshaped
109  true, // reshape_b_only_on_first_run
110  0, // depth_output_gemm3d
111  false, // reinterpret_input_as_3d
112  fc_info.retain_internal_weights, // retain_internal_weights
113  gemmlowp_output_stage, // gemmlowp_output_stage
114  fc_info.fp_mixed_precision, // fp_mixed_precision
115  true, // broadcast_bias
116  ActivationLayerInfo()); // activation_info
117 
118  if(is_data_type_quantized_asymmetric(input.data_type()))
119  {
120  const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
121  const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
122 
123  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
124  // Extract and negate input and weights offset
125  const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset);
126  const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
127 
128  // Validate gemmlowp function
129  ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(&input.clone()->set_quantization_info(input_quantization_info),
130  &weights.clone()->set_quantization_info(weights_quantization_info),
131  bias,
132  &output,
133  gemm_info));
134  }
135  else
136  {
137  ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
138  }
139 
140  return Status{};
141 }
142 } // namespace
143 
145 {
146  configure(CLKernelLibrary::get().get_compile_context(), input, output);
147 }
148 
149 void CLFullyConnectedLayerReshapeWeights::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
150 {
151  auto k = std::make_unique<CLTransposeKernel>();
152  k->configure(compile_context, input, output);
153  _kernel = std::move(k);
154 }
155 
157 {
158  return CLTransposeKernel::validate(input, output);
159 }
160 
161 CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
162  : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(), _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(), _reshape_weights_function(),
163  _mm_gemm(memory_manager, weights_manager), _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(), _are_weights_converted(true),
164  _are_weights_reshaped(true), _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
165 {
166 }
167 void CLFullyConnectedLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
168  const FullyConnectedLayerInfo &fc_info)
169 {
170  GEMMLowpOutputStageInfo gemmlowp_output_stage;
171  construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage, fc_info.activation_info);
172 
173  const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
174  false, // is_b_reshaped
175  true, // reshape_b_only_on_first_run
176  0, // depth_output_gemm3d
177  false, // reinterpret_input_as_3d
178  fc_info.retain_internal_weights, // retain_internal_weights
179  gemmlowp_output_stage, // gemmlowp_output_stage
180  fc_info.fp_mixed_precision, // fp_mixed_precision
181  true, // broadcast_bias
182  fc_info.activation_info); // activation_info
183 
184  if(_is_quantized)
185  {
186  // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
187  // Extract and negate input and weights offset
188  const QuantizationInfo input_quantization_info = input->info()->quantization_info();
189  const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
190 
191  input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
192  weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
193 
194  // Configure gemmlowp function
195  _mm_gemmlowp.configure(compile_context, input, weights, bias, output, gemm_info);
196 
197  // Revert back QuantizatioInfo as input and weights could be used in other fully connected layers
198  input->info()->set_quantization_info(input_quantization_info);
199  weights->info()->set_quantization_info(weights_quantization_info);
200  }
201  else
202  {
203  // Configure matrix multiply kernel
204  _mm_gemm.configure(compile_context, input, weights, bias, output, 1.f, 1.f, gemm_info);
205  }
206 }
207 
208 void CLFullyConnectedLayer::configure_conv_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
209  const FullyConnectedLayerInfo &fc_info)
210 {
211  ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
212 
213  // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
214 
215  // Initialize output tensor for flatten
216  TensorShape shape_flatten = compute_flatten_shape(input->info());
217  _flatten_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten).set_data_layout(DataLayout::NCHW));
218 
219  // Configure flatten kernel
220  _memory_group.manage(&_flatten_output);
221  _flatten_layer.configure(compile_context, input, &_flatten_output);
222 
223  // Configure matrix multiply kernel
224  configure_mm(compile_context, &_flatten_output, weights, bias, output, fc_info);
225 
226  // Allocate the output tensor for flatten once all the configure methods have been called
227  _flatten_output.allocator()->allocate();
228 }
229 
230 void CLFullyConnectedLayer::configure_fc_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
231  const FullyConnectedLayerInfo &fc_info)
232 {
233  ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
234 
235  // Configure matrix multiply kernel
236  configure_mm(compile_context, input, weights, bias, output, fc_info);
237 }
238 
239 void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
240  FullyConnectedLayerInfo fc_info)
241 {
242  configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, fc_info);
243 }
244 
245 void CLFullyConnectedLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
246  FullyConnectedLayerInfo fc_info)
247 {
248  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
249 
250  // Perform validate step
252  weights->info(),
253  biases != nullptr ? biases->info() : nullptr,
254  output->info(),
255  fc_info));
256 
257  _are_weights_converted = true;
258  _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
259  _is_fc_after_conv = true;
260  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
261  _is_prepared = fc_info.retain_internal_weights;
262  _original_weights = weights;
263 
264  if(_weights_manager)
265  {
266  _weights_manager->manage(weights);
267  }
268 
269  const ICLTensor *weights_to_use = weights;
270 
271  // With the Fully Connected layer we can have 4 different cases:
272  // 1) Convolution layer -> Fully Connected layer without batches
273  // 2) Fully Connected layer -> Fully Connected layer without batches
274  // 3) Convolution layer -> Fully Connected layer with batches
275  // 4) Fully Connected layer -> Fully Connected layer with batches
276 
277  // Check if we have a fully connected layer with batches
278  const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
279  if(is_batched_fc_layer)
280  {
281  _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
282  input->info()->tensor_shape().cend(),
283  output->info()->tensor_shape().cbegin() + 1));
284  }
285  else
286  {
287  _is_fc_after_conv = input->info()->num_dimensions() > 1;
288  }
289 
290  // Reshape weights if needed
291  if(!_are_weights_reshaped)
292  {
293  if(_weights_manager && _weights_manager->are_weights_managed(weights))
294  {
295  _reshape_weights_managed_function.configure(compile_context, weights);
296  weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed_function));
297  }
298  else
299  {
300  // Reshape the weights
301  _reshape_weights_function.configure(compile_context, weights, &_reshape_weights_output);
302  weights_to_use = &_reshape_weights_output;
303  }
304  }
305 
306  // Convert weights if needed
307  if(_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
308  {
309  if(_weights_manager && _weights_manager->are_weights_managed(weights_to_use))
310  {
311  _convert_weights_managed.configure(compile_context, weights_to_use,
312  input->info()->tensor_shape(),
313  fc_info.weights_trained_layout);
314  weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_convert_weights_managed));
315  }
316  else
317  {
318  // Convert weights
319  _convert_weights.configure(compile_context, weights_to_use,
320  &_converted_weights_output,
321  input->info()->tensor_shape(),
322  fc_info.weights_trained_layout);
323 
324  weights_to_use = &_converted_weights_output;
325  }
326  _are_weights_converted = false;
327  }
328 
329  if(_is_fc_after_conv)
330  {
331  // Fully Connected layer after a Convolution Layer without batches
332  configure_conv_fc(compile_context, input, weights_to_use, biases, output, fc_info);
333  }
334  else
335  {
336  // Fully Connected layer after a Fully Connected Layer without batches
337  configure_fc_fc(compile_context, input, weights_to_use, biases, output, fc_info);
338  }
339 }
340 
341 Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
342  FullyConnectedLayerInfo fc_info)
343 {
344  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
350 
351  bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
352  bool is_fc_after_conv = true;
353 
354  const ITensorInfo &flatten_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(input)).set_data_layout(DataLayout::NCHW));
355  const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
356  const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
357 
358  // With the Fully Connected layer we can have 4 different cases:
359  // 1) Convolution layer -> Fully Connected layer without batches
360  // 2) Fully Connected layer -> Fully Connected layer without batches
361  // 3) Convolution layer -> Fully Connected layer with batches
362  // 4) Fully Connected layer -> Fully Connected layer with batches
363 
364  const ITensorInfo *input_to_use = input;
365  const ITensorInfo *weights_to_use = weights;
366 
367  // Check if we have a fully connected layer with batches
368  const bool is_batched_fc_layer = output->dimension(1) > 1;
369  if(is_batched_fc_layer)
370  {
371  is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->tensor_shape().cbegin() + 3,
372  input->tensor_shape().cend(),
373  output->tensor_shape().cbegin() + 1));
374  }
375  else
376  {
377  is_fc_after_conv = input->num_dimensions() > 1;
378  }
379 
380  if(!weights_reshaped)
381  {
382  // Validate reshape weights kernel
384  weights_to_use = &reshaped_weights;
385  }
386 
387  if(is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
388  {
389  // Validate convert weights kernel
391  &converted_weights,
392  input->tensor_shape(),
393  fc_info.weights_trained_layout));
394  weights_to_use = &converted_weights;
395  }
396 
397  if(is_fc_after_conv)
398  {
399  // Fully Connected layer after a Convolution Layer without batches
400  ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (input->dimension(0) * input->dimension(1) * input->dimension(2))));
401 
402  // Validate flatten kernel
404  input_to_use = &flatten_input;
405  }
406  else
407  {
408  // Fully Connected layer after a Fully Connected Layer without batches
409  ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
410  }
411 
412  // Validate matrix multiply kernel
413  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
414 
415  return Status{};
416 }
417 
419 {
420  prepare();
421 
422  MemoryGroupResourceScope scope_mg(_memory_group);
423 
424  // Linearize input if it comes from a convolutional layer
425  if(_is_fc_after_conv)
426  {
427  _flatten_layer.run();
428  }
429 
430  // Run matrix multiply
431  if(_is_quantized)
432  {
433  _mm_gemmlowp.run();
434  }
435  else
436  {
437  _mm_gemm.run();
438  }
439 }
440 
442 {
443  if(!_is_prepared)
444  {
445  if(!_weights_manager)
446  {
447  ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
448  }
449 
450  auto release_unused = [](CLTensor * w)
451  {
452  if(!w->is_used())
453  {
454  CLScheduler::get().queue().finish();
455  w->allocator()->free();
456  }
457  };
458 
459  // Pointer to current weights
460  const ICLTensor *cur_weights = _original_weights;
461 
462  // Reshape of the weights if needed (happens only once)
463  if(!_are_weights_reshaped)
464  {
465  if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
466  {
467  cur_weights = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->run(cur_weights, &_reshape_weights_managed_function));
468  }
469  else
470  {
471  // Run reshape weights kernel and mark weights as unused
472  _reshape_weights_output.allocator()->allocate();
473  _reshape_weights_function.run();
474 
475  cur_weights->mark_as_unused();
476  cur_weights = &_reshape_weights_output;
477  }
478  _are_weights_reshaped = true;
479  }
480 
481  // Convert weights if needed (happens only once)
482  if(!_are_weights_converted)
483  {
484  if(_weights_manager && _weights_manager->are_weights_managed(cur_weights))
485  {
486  _weights_manager->run(cur_weights, &_convert_weights_managed);
487  }
488  else
489  {
490  _converted_weights_output.allocator()->allocate();
491  _convert_weights.run();
492  cur_weights->mark_as_unused();
493  }
494 
495  _are_weights_converted = true;
496  }
497 
498  // Release reshaped weights if unused
499  release_unused(&_reshape_weights_output);
500 
501  // Prepare GEMM prepare and release unused weights
502  if(!_is_quantized)
503  {
504  _mm_gemm.prepare();
505  }
506 
507  // Release converted weights if unused
508  release_unused(&_reshape_weights_output);
509  release_unused(&_converted_weights_output);
510 
511  _is_prepared = true;
512  }
513 }
514 } // namespace arm_compute
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:1168
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLFlattenLayer.
SimpleTensor< float > w
Definition: DFT.cpp:156
Shape of a tensor.
Definition: TensorShape.h:39
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
Definition: CLGEMM.cpp:870
CLFullyConnectedLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
void run() override
Run the kernels contained in the function.
Definition: CLGEMM.cpp:778
bool enabled() const
Check if initialised.
Definition: Types.h:1600
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool retain_internal_weights
Retain internal reshaped weights.
Definition: Types.h:1618
static CLScheduler & get()
Access the scheduler singleton.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
Definition: ITensor.cpp:163
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
void configure(const ICLTensor *input)
Configures the CLFullyConnectedLayerReshapeWeights function.
Fully connected layer info.
Definition: Types.h:1613
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
CLTensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: CLTensor.cpp:61
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel&#39;s inputs, output.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void manage(const ITensor *weights, ITransformWeights *parent=nullptr)
Start managing a weights tensor.
Status class.
Definition: Error.h:52
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
void prepare() override
Prepare the function for executing.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:168
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
TensorShape compute_flatten_shape(const ITensorInfo *input)
Calculate the flattened output shape of a tensor.
const DataType data_type
Definition: Im2Col.cpp:150
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
void run() override final
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
Definition: Utils.cpp:483
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of CLFullyConnectedLayer.
quantized, asymmetric fixed-point 8-bit number unsigned
bool are_weights_reshaped
Reshape the weights tensor if false.
Definition: Types.h:1617
void run() override
Run the kernels contained in the function.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
Definition: Types.h:1952
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel&#39;s input and output.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
ActivationLayerInfo activation_info
Fused activation to apply after the matrix multiplication.
Definition: Types.h:1620
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Definition: CLScheduler.cpp:41
Weights manager interface to handle weights transformations.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
Num samples, channels, height, width.
CLCompileContext class.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout)
Static function to check if given info will lead to a valid configuration of CLConvertFullyConnectedW...
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1190
__constant DATA_TYPE16 type_min
Definition: minmaxloc.cl:46
std::array< T, num_max_dimensions >::const_iterator cend() const
Returns a read-only (constant) iterator that points one past the last element in the dimension array...
Definition: Dimensions.h:255
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
std::array< T, num_max_dimensions >::const_iterator cbegin() const
Returns a read-only (constant) iterator that points to the first element in the dimension array...
Definition: Dimensions.h:231
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel&#39;s inputs and output.
Definition: CLGEMM.cpp:666
DataLayout weights_trained_layout
Layout that the weights have been trained with.
Definition: Types.h:1615
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout)
Initialize the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
bool fp_mixed_precision
Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
Definition: Types.h:1619
void configure(const ICLTensor *input, ICLTensor *output)
Set the input and output tensors.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
Definition: CLGEMM.cpp:727
void configure(const ICLTensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
Configures the CLConvertFullyConnectedWeights function.
__constant DATA_TYPE16 type_max
Definition: minmaxloc.cl:47
bool transpose_weights
Transpose weights if true.
Definition: Types.h:1616
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
GEMM information class.
Definition: Types.h:2003
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
ActivationFunction activation() const
Get the type of activation function.
Definition: Types.h:1585
quantized, asymmetric fixed-point 8-bit number signed
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
Definition: Dimensions.h:46
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLFullyConnectedLayerRes...
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
Definition: Utils.h:564
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTransposeKernel.
Basic implementation of the OpenCL tensor interface.
Definition: CLTensor.h:41