ArmNN
 24.08
ClBackend.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2024 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ClBackend.hpp"
7 #include "ClBackendContext.hpp"
9 #include "ClBackendId.hpp"
12 #include "ClLayerSupport.hpp"
14 #include "ClWorkloadFactory.hpp"
15 
17 #include <armnn/Descriptors.hpp>
18 
21 
25 
35 
36 #include <Optimizer.hpp>
37 
38 #include <arm_compute/core/Types.h>
39 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
40 
41 namespace armnn
42 {
43 
45 {
46  static const BackendId s_Id{ClBackendId()};
47  return s_Id;
48 }
49 
51 {
53  {
54  return std::make_unique<ClMemoryManager>(m_CustomAllocator);
55  }
56  return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
57 }
58 
60  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
61 {
62  return std::make_unique<ClWorkloadFactory>(
63  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
64 }
65 
67  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
68 {
69  return std::make_unique<ClWorkloadFactory>(
70  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
71 }
72 
74  TensorHandleFactoryRegistry& registry) const
75 {
76  std::shared_ptr<ClMemoryManager> memoryManager;
78  {
79  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
80  }
81  else
82  {
83  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
84  }
85 
86  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
87  std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
89 
90  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
91  registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
92 
93  registry.RegisterMemoryManager(memoryManager);
94  registry.RegisterFactory(std::move(factory));
95  registry.RegisterFactory(std::move(importFactory));
96 
97  return std::make_unique<ClWorkloadFactory>(
98  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
99 }
100 
102  TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
103 {
104  std::shared_ptr<ClMemoryManager> memoryManager;
106  {
107  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
108  }
109  else
110  {
111  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
112  }
113 
114  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
115  std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
117 
118  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
119  registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
120 
121  registry.RegisterMemoryManager(memoryManager);
122  registry.RegisterFactory(std::move(factory));
123  registry.RegisterFactory(std::move(importFactory));
124 
125  return std::make_unique<ClWorkloadFactory>(
126  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
127 }
128 
130  TensorHandleFactoryRegistry& registry,
131  const ModelOptions& modelOptions,
132  MemorySourceFlags inputFlags,
133  MemorySourceFlags outputFlags) const
134 {
135  // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
136  if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
137  {
138  inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
139  }
140  if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
141  {
142  outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
143  }
144  std::shared_ptr<ClMemoryManager> memoryManager;
146  {
147  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
148  }
149  else
150  {
151  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
152  }
153 
154  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
155  std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
156  inputFlags, outputFlags);
157 
158  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
159  registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
160 
161  registry.RegisterMemoryManager(memoryManager);
162  registry.RegisterFactory(std::move(factory));
163  registry.RegisterFactory(std::move(importFactory));
164 
165  return std::make_unique<ClWorkloadFactory>(
166  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
167 }
168 
169 std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
170 {
171  return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic(),
173 }
174 
176 {
177  std::shared_ptr<ClMemoryManager> memoryManager;
179  {
180  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
181  }
182  else
183  {
184  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
185  }
186 
187  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
188  std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
190 
191  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
192  registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
193 
194  registry.RegisterMemoryManager(memoryManager);
195  registry.RegisterFactory(std::move(factory));
196  registry.RegisterFactory(std::move(importFactory));
197 
198 }
199 
201  MemorySourceFlags inputFlags,
202  MemorySourceFlags outputFlags)
203 {
204  // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
205  if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
206  {
207  inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
208  }
209  if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
210  {
211  outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
212  }
213  std::shared_ptr<ClMemoryManager> memoryManager;
215  {
216  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
217  }
218  else
219  {
220  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
221  }
222 
223  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
224  std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
225  inputFlags, outputFlags);
226 
227  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
228  registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
229 
230  registry.RegisterMemoryManager(memoryManager);
231  registry.RegisterFactory(std::move(factory));
232  registry.RegisterFactory(std::move(importFactory));
233 }
234 
236 {
237  return IBackendContextPtr{new ClBackendContext{options}};
238 }
239 
242 {
244 }
245 
247  const ModelOptions& modelOptions) const
248 {
249  return IBackendSpecificModelContextPtr{new ClBackendModelContext{modelOptions}};
250 }
251 
253 {
254  static ILayerSupportSharedPtr layerSupport
255  {
257  };
258  return layerSupport;
259 }
260 
262 {
263  static ILayerSupportSharedPtr layerSupport
264  {
266  };
267  return layerSupport;
268 }
269 
270 std::unique_ptr<ICustomAllocator> ClBackend::GetDefaultAllocator() const
271 {
272  return std::make_unique<ClBackendDefaultAllocator>();
273 }
274 
276 {
277  // add new capabilities here..
278  return BackendCapabilities ("GpuAcc",
279  {
280  {"NonConstWeights", true},
281  {"AsyncExecution", false},
282  {"ProtectedContentAllocation", true},
283  {"ConstantTensorsAsInputs", true},
284  {"PreImportIOTensors", false},
285  {"ExternallyManagedMemory", true},
286  {"MultiAxisPacking", false},
287  {"SingleAxisPacking", true},
288  {"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()}
289  });
290 }
291 
293  const ModelOptions& modelOptions) const
294 {
295  OptimizationViews optimizationViews(modelOptions);
296 
297  auto it = subgraph.end();
298  bool isFastMathEnabled = false;
299  std::map<LayerGuid, Layer*> untouched;
300 
301  while (it != subgraph.begin())
302  {
303  --it;
304  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
305  untouched.insert({base.GetGuid(), &base});
306  }
307 
308  it = subgraph.end();
309 #if defined(ARMCOMPUTECL_ENABLED)
311 
312  if (modelContextPtr)
313  {
314  auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
315  if (clModelOptions)
316  {
317  isFastMathEnabled = clModelOptions->IsFastMathEnabled();
318  }
319  }
320 #endif
321  while (it != subgraph.begin())
322  {
323  --it;
324  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
325 
326  // Fuse activation into previous layer if supported by backend
332  && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
333  {
334  for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
335  {
336  if (output->GetNumConnections() == 1)
337  {
338  for (auto&& childInput : output->GetConnections())
339  {
340  if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
341  (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
342  {
343  Layer& child = childInput->GetOwningLayer();
344 
345  auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
346 
347  const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
348  base.GetName();
349 
350  // Get params from activation layer
351  ActivationDescriptor activationDesc = activationLayer->GetParameters();
352 
353  if (base.GetType() == LayerType::Convolution2d)
354  {
355  Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
356 
357  Optional<TensorInfo> biases;
358 
359  if (baseLayer->GetParameters().m_BiasEnabled)
360  {
361  biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
362  }
363 
366  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
367  baseLayer->GetParameters(),
369  biases,
370  isFastMathEnabled,
371  &activationDesc);
372 
373  if (status)
374  {
375  FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
376  baseLayer,
377  activationLayer,
378  activationDesc,
379  name);
380  untouched.erase(baseLayer->GetGuid());
381  untouched.erase(activationLayer->GetGuid());
382  }
383  }
384  else if (base.GetType() == LayerType::DepthwiseConvolution2d)
385  {
386  DepthwiseConvolution2dLayer* baseLayer =
387  PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
388 
389  Optional<TensorInfo> biases;
390 
391  if (baseLayer->GetParameters().m_BiasEnabled)
392  {
393  biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
394  }
395 
398  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
399  baseLayer->GetParameters(),
401  biases,
402  &activationDesc);
403 
404  if (status)
405  {
406  FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
407  baseLayer,
408  activationLayer,
409  activationDesc,
410  name);
411  untouched.erase(baseLayer->GetGuid());
412  untouched.erase(activationLayer->GetGuid());
413  }
414  }
415  else if (base.GetType() == LayerType::FullyConnected)
416  {
417  FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
418  FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
419 
420  // As bias is optional only try to get TensorInfo from input if bias is enabled.
421  Optional<TensorInfo> biases;
422  if (descriptor.m_BiasEnabled)
423  {
424  biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
425  }
426 
429  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
431  biases,
432  baseLayer->GetParameters(),
433  &activationDesc);
434 
435  if (status)
436  {
437  FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
438  baseLayer,
439  activationLayer,
440  activationDesc,
441  name);
442  untouched.erase(baseLayer->GetGuid());
443  untouched.erase(activationLayer->GetGuid());
444  }
445  }
446  else if (base.GetType() == LayerType::BatchNormalization)
447  {
448  BatchNormalizationLayer* baseLayer =
449  PolymorphicDowncast<BatchNormalizationLayer*>(&base);
450 
453  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
454  baseLayer->m_Mean->GetTensorInfo(),
455  baseLayer->m_Variance->GetTensorInfo(),
456  baseLayer->m_Beta->GetTensorInfo(),
457  baseLayer->m_Gamma->GetTensorInfo(),
458  baseLayer->GetParameters(),
459  &activationDesc);
460 
461  if (status)
462  {
463  BatchNormalizationLayer* replacementLayer =
464  FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
465  baseLayer,
466  activationLayer,
467  activationDesc,
468  name);
469 
470  replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
471  replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
472  replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
473  replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
474 
475  untouched.erase(baseLayer->GetGuid());
476  untouched.erase(activationLayer->GetGuid());
477  }
478  }
479  else if (base.GetType() == LayerType::Addition)
480  {
481  AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
482 
486  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
487  &activationDesc);
488 
489  if (status)
490  {
491  FuseAdditionLayer<AdditionLayer>(optimizationViews,
492  baseLayer,
493  activationLayer,
494  activationDesc,
495  name);
496 
497  untouched.erase(baseLayer->GetGuid());
498  untouched.erase(activationLayer->GetGuid());
499  }
500  }
501  else if (base.GetType() == LayerType::Division)
502  {
503  DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
504 
508  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
509  &activationDesc);
510 
511  if (status)
512  {
513  FuseDivisionLayer<DivisionLayer>(optimizationViews,
514  baseLayer,
515  activationLayer,
516  activationDesc,
517  name);
518  untouched.erase(baseLayer->GetGuid());
519  untouched.erase(activationLayer->GetGuid());
520  }
521  }
522  else if (base.GetType() == LayerType::Multiplication)
523  {
524  MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
525 
529  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
530  &activationDesc);
531 
532  if (status)
533  {
534  FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
535  baseLayer,
536  activationLayer,
537  activationDesc,
538  name);
539  untouched.erase(baseLayer->GetGuid());
540  untouched.erase(activationLayer->GetGuid());
541  }
542  }
543  else if (base.GetType() == LayerType::Subtraction)
544  {
545  SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
546 
550  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
551  &activationDesc);
552 
553  if (status)
554  {
555  FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
556  baseLayer,
557  activationLayer,
558  activationDesc,
559  name);
560  untouched.erase(baseLayer->GetGuid());
561  untouched.erase(activationLayer->GetGuid());
562  }
563  }
564  else if (base.GetType() == LayerType::ElementwiseBinary)
565  {
566  ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);
567 
568  if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)
569  {
573  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
574  &activationDesc);
575 
576  if (status)
577  {
578  FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
579  baseLayer,
580  activationLayer,
581  activationDesc,
583  name);
584  untouched.erase(baseLayer->GetGuid());
585  untouched.erase(activationLayer->GetGuid());
586  }
587  }
588  else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
589  {
593  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
594  &activationDesc);
595 
596  if (status)
597  {
598  FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
599  baseLayer,
600  activationLayer,
601  activationDesc,
603  name);
604  untouched.erase(baseLayer->GetGuid());
605  untouched.erase(activationLayer->GetGuid());
606  }
607  }
608  else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
609  {
613  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
614  &activationDesc);
615 
616  if (status)
617  {
618  FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
619  baseLayer,
620  activationLayer,
621  activationDesc,
623  name);
624  untouched.erase(baseLayer->GetGuid());
625  untouched.erase(activationLayer->GetGuid());
626  }
627  }
628  else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
629  {
633  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
634  &activationDesc);
635 
636  if (status)
637  {
638  FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
639  baseLayer,
640  activationLayer,
641  activationDesc,
643  name);
644  untouched.erase(baseLayer->GetGuid());
645  untouched.erase(activationLayer->GetGuid());
646  }
647  }
648  // No fusion available for other BinaryOperations
649  }
650  }
651  }
652  }
653  }
654  }
655 
656  // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
657  if (base.GetType() == LayerType::Reduce)
658  {
659  ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
660  ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
661 
662  if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
663  {
664  // Add new layers to the graph and connect them.
665  std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
666  baseLayer,
667  reduceDescriptor);
668 
669  // Replace existing baselayer with new subgraph.
670  ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
671  untouched.erase(baseLayer->GetGuid());
672  }
673  }
674 
675  // Remove Reshape where possible
676  if (base.GetType() == LayerType::Reshape)
677  {
678  ReshapeLayer* baseLayer = PolymorphicDowncast<ReshapeLayer*>(&base);
679 
680  // Cannot remove a Reshape if it's connected to any layer that has an NCHW layout
681  if (ConnectedToLayerWithNCHW(baseLayer))
682  {
683  continue;
684  }
685  RemoveReshapeLayer(baseLayer, untouched, optimizationViews);
686  }
687 
688  // Special case to fuse padding into average pooling 2d for quantized datatype.
689  // Required to be done as a backend specific optimization as Neon does not support this special case.
690  if (base.GetType() == LayerType::Pooling2d)
691  {
692  Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);
693  Pooling2dDescriptor poolingDescriptor = baseLayer->GetParameters();
694 
696  {
697  PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(
699  if (padLayer->GetOutputSlot(0).GetNumConnections() == 1 &&
701  poolingDescriptor,
702  padLayer->GetOutputSlot().GetTensorInfo(),
703  true))
704  {
705  FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,
706  poolingDescriptor, padLayer);
707  untouched.erase(baseLayer->GetGuid());
708  untouched.erase(padLayer->GetGuid());
709  }
710  }
711  }
712  }
713 
714  if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty())
715  {
716  optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
717  }
718  else
719  {
720  ReportUntouchedLayers(optimizationViews, untouched);
721  }
722 
723  return optimizationViews;
724 }
725 
726 } // namespace armnn
armnn::MemorySource::Malloc
@ Malloc
armnn::OptimizationViews::AddUntouchedSubgraph
void AddUntouchedSubgraph(SubgraphView &&subgraph)
Definition: OptimizationViews.hpp:48
armnn::ClBackend::GetLayerSupport
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
Definition: ClBackend.cpp:252
armnn::BinaryOperation::Mul
@ Mul
ClWorkloadFactory.hpp
armnn::ActivationDescriptor
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
armnn::FullyConnectedDescriptor
A FullyConnectedDescriptor for the FullyConnectedLayer.
Definition: Descriptors.hpp:507
armnn::BinaryOperation::Add
@ Add
armnn::ClBackend::m_UsingCustomAllocator
bool m_UsingCustomAllocator
Definition: ClBackend.hpp:284
ClDepthwiseConvolutionWorkload.hpp
armnn::LayerType::BatchNormalization
@ BatchNormalization
armnn::ClSubtractionValidate
arm_compute::Status ClSubtractionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClSubtractionWorkload.cpp:46
armnn::Optional
Definition: Optional.hpp:270
Descriptors.hpp
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:100
armnn::DepthwiseConvolution2dDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:708
armnn::ClTensorHandleFactory::GetIdStatic
static const FactoryId & GetIdStatic()
Definition: ClTensorHandleFactory.cpp:94
armnn::ClBackend::RegisterTensorHandleFactories
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
Definition: ClBackend.cpp:175
armnn::BatchNormalizationLayer::m_Mean
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
Definition: BatchNormalizationLayer.hpp:19
armnn::SubtractionLayer
This layer represents a subtraction operation.
Definition: SubtractionLayer.hpp:14
armnn::IBackendInternal::IMemoryManagerSharedPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
Definition: IBackendInternal.hpp:99
armnn::ClMultiplicationWorkloadValidate
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClMultiplicationWorkload.cpp:18
armnn::TensorHandleFactoryRegistry
Definition: TensorHandleFactoryRegistry.hpp:23
armnn::BackendCapabilities
BackendOptions BackendCapabilities
Definition: BackendOptions.hpp:19
armnn::DepthwiseConvolution2dLayer
This layer represents a depthwise convolution 2d operation.
Definition: DepthwiseConvolution2dLayer.hpp:15
ClLayerSupport.hpp
armnn::BinaryOperation::Sub
@ Sub
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
armnn::ClBackend::GetHandleFactoryPreferences
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
Definition: ClBackend.cpp:169
ClTensorHandleFactory.hpp
armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:339
armnn::TensorHandleFactoryRegistry::RegisterMemoryManager
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
Definition: TensorHandleFactoryRegistry.cpp:34
ClReduceWorkload.hpp
ClBackendDefaultAllocator.hpp
BackendRegistry.hpp
armnn::ClFullyConnectedWorkloadValidate
arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Definition: ClFullyConnectedWorkload.cpp:19
armnn::ClConvolution2dWorkloadValidate
arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
Definition: ClConvolution2dWorkload.cpp:23
armnn::LayerType::Reduce
@ Reduce
armnn::BatchNormalizationLayer
This layer represents a batch normalization operation.
Definition: BatchNormalizationLayer.hpp:15
armnn::IBackendInternal::IBackendContextPtr
std::unique_ptr< IBackendContext > IBackendContextPtr
Definition: IBackendInternal.hpp:90
Optimizer.hpp
armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
armnn::LayerWithParameters::GetParameters
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
Definition: LayerWithParameters.hpp:19
armnn::LayerType::ElementwiseBinary
@ ElementwiseBinary
armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:332
armnn::ClBackend::GetDefaultAllocator
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
Definition: ClBackend.cpp:270
armnn::Convolution2dLayer
This layer represents a convolution 2d operation.
Definition: Convolution2dLayer.hpp:15
ClDivisionWorkload.hpp
armnn::ClBackend::CreateBackendProfilingContext
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
Definition: ClBackend.cpp:240
armnn::Layer
Definition: Layer.hpp:230
armnn::ClImportTensorHandleFactory::GetIdStatic
static const FactoryId & GetIdStatic()
Definition: ClImportTensorHandleFactory.cpp:93
armnn::ClAdditionValidate
arm_compute::Status ClAdditionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClAdditionWorkload.cpp:45
ClBackendModelContext.hpp
armnn::AdditionLayer
This layer represents an addition operation.
Definition: AdditionLayer.hpp:13
armnn::BatchNormalizationLayer::m_Gamma
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
Definition: BatchNormalizationLayer.hpp:25
armnn::ClBackendModelContext
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
Definition: ClBackendModelContext.hpp:28
armnn::Layer::GetAdditionalInformation
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:368
armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:132
armnn::optimizations::pad_fold::TryFoldPadIntoLayer2d
bool TryFoldPadIntoLayer2d(const PadDescriptor &padDescriptor, Descriptor &layerDescriptor, const TensorInfo &tensorInfo)
Definition: FoldPadIntoLayer2d.hpp:88
armnn::ReshapeLayer
This layer represents a reshape operation.
Definition: ReshapeLayer.hpp:15
armnn::BatchNormalizationLayer::m_Variance
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
Definition: BatchNormalizationLayer.hpp:21
armnn::ClBackend::GetCapabilities
BackendCapabilities GetCapabilities() const override
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
Definition: ClBackend.cpp:275
armnn::SubgraphView::begin
IConnectableLayerIterator begin()
Definition: SubgraphView.cpp:286
armnn::LayerType::Subtraction
@ Subtraction
armnn::FullyConnectedDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:526
armnn::RemoveReshapeLayer
void RemoveReshapeLayer(ReshapeLayer *baseLayer, std::map< LayerGuid, Layer * > &untouched, OptimizationViews &optimizationViews)
Definition: SubgraphUtils.hpp:293
armnn::ReduceLayer
This layer represents a reduction operation.
Definition: ReduceLayer.hpp:14
armnn::MemorySource::Undefined
@ Undefined
armnn::MultiplicationLayer
This layer represents a multiplication operation.
Definition: MultiplicationLayer.hpp:14
armnn::OutputSlot::GetNumConnections
unsigned int GetNumConnections() const override
Definition: Layer.hpp:158
IBackendContext.hpp
PolymorphicDowncast.hpp
armnn::Convolution2dDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:582
ClFullyConnectedWorkload.hpp
armnn::ClLayerSupport
Definition: ClLayerSupport.hpp:14
armnn::Layer::GetGuid
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:343
armnn::LayerType::Multiplication
@ Multiplication
armnn::ClBackend::m_CustomAllocator
std::shared_ptr< ClBackendCustomAllocatorWrapper > m_CustomAllocator
Definition: ClBackend.hpp:283
armnn::SubgraphView
The SubgraphView class represents a subgraph of a Graph.
Definition: SubgraphView.hpp:31
armnn::LayerType::Addition
@ Addition
armnn::FullyConnectedLayer
This layer represents a fully connected operation.
Definition: FullyConnectedLayer.hpp:15
armnn::OptimizationViews
Definition: OptimizationViews.hpp:17
ArmComputeUtils.hpp
armnn::ElementwiseBinaryLayer
This layer represents a elementwiseBinary operation.
Definition: ElementwiseBinaryLayer.hpp:14
armnn::Pooling2dLayer
This layer represents a pooling 2d operation.
Definition: Pooling2dLayer.hpp:13
armnn::ClBackend::CreateBackendContext
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
Definition: ClBackend.cpp:235
armnn::DivisionLayer
This layer represents a division operation.
Definition: DivisionLayer.hpp:14
armnn::ClBackend::CreateWorkloadFactory
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: ClBackend.cpp:59
armnn::LayerType::Pooling2d
@ Pooling2d
ClBatchNormalizationFloatWorkload.hpp
armnn::LayerType::Division
@ Division
armnn::IBackendInternal::IBackendProfilingContextPtr
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
Definition: IBackendInternal.hpp:92
armnn::ClBackend::OptimizeSubgraphView
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
Definition: ClBackend.cpp:292
ClConvolution2dWorkload.hpp
armnn::LayerType::FullyConnected
@ FullyConnected
armnn::BatchNormalizationLayer::m_Beta
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
Definition: BatchNormalizationLayer.hpp:23
ClBackendId.hpp
ClAdditionWorkload.hpp
armnn::LayerType::DepthwiseConvolution2d
@ DepthwiseConvolution2d
armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::Status
Status
Definition: Types.hpp:42
ClBackendContext.hpp
armnn::IBackendInternal::IBackendProfilingPtr
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
Definition: IBackendInternal.hpp:93
armnn::IRuntime::CreationOptions
Definition: IRuntime.hpp:78
armnn::LayerType::Reshape
@ Reshape
armnn::Layer::BeginOutputSlots
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:266
armnn::IBackendInternal::IMemoryManagerUniquePtr
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
Definition: IBackendInternal.hpp:98
armnn::ReduceDescriptor::m_vAxis
std::vector< uint32_t > m_vAxis
The indices of the dimensions to reduce.
Definition: Descriptors.hpp:1556
armnn::ClBatchNormalizationValidate
arm_compute::Status ClBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Definition: ClBatchNormalizationFloatWorkload.cpp:19
ClBackend.hpp
armnn::ClDivisionWorkloadValidate
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClDivisionWorkload.cpp:18
armnn::OptimizationViews::GetSubstitutions
const Substitutions & GetSubstitutions() const
Definition: OptimizationViews.hpp:58
armnn::SubgraphView::end
IConnectableLayerIterator end()
Definition: SubgraphView.cpp:291
armnn::OptimizationViews::GetDeletedSubgraphs
const Subgraphs & GetDeletedSubgraphs() const
Definition: OptimizationViews.hpp:61
armnn::ElementwiseBinaryDescriptor::m_Operation
BinaryOperation m_Operation
Specifies the elementwiseBinary operation to execute.
Definition: Descriptors.hpp:125
armnn::BackendId
Definition: BackendId.hpp:75
ClSubtractionWorkload.hpp
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::ClBackend::GetIdStatic
static const BackendId & GetIdStatic()
Definition: ClBackend.cpp:44
armnn::LayerType::Pad
@ Pad
ClImportTensorHandleFactory.hpp
armnn::TensorHandleFactoryRegistry::RegisterFactory
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
Definition: TensorHandleFactoryRegistry.cpp:12
armnn::ClBackend::CreateBackendSpecificModelContext
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
Definition: ClBackend.cpp:246
ClMultiplicationWorkload.hpp
armnn::ClBackendId
constexpr const char * ClBackendId()
Definition: ClBackendId.hpp:10
armnn::IBackendInternal::ILayerSupportSharedPtr
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
Definition: IBackendInternal.hpp:94
armnn::TensorHandleFactoryRegistry::RegisterCopyAndImportFactoryPair
void RegisterCopyAndImportFactoryPair(ITensorHandleFactory::FactoryId copyFactoryId, ITensorHandleFactory::FactoryId importFactoryId)
Register a pair of TensorHandleFactory Id for Memory Copy and TensorHandleFactory Id for Memory Impor...
Definition: TensorHandleFactoryRegistry.cpp:66
armnn::ModelOptions
std::vector< BackendOptions > ModelOptions
Definition: BackendOptions.hpp:18
armnn::ReportUntouchedLayers
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
Definition: SubgraphUtils.hpp:220
armnn::ConnectedToLayerWithNCHW
bool ConnectedToLayerWithNCHW(Layer *baseLayer)
Checks if the Layer is connected to any Layer that has an NCHW layout.
Definition: SubgraphUtils.hpp:250
armnn::BinaryOperation::Div
@ Div
armnn::LayerType::Convolution2d
@ Convolution2d
armnn::Pooling2dDescriptor
A Pooling2dDescriptor for the Pooling2dLayer.
Definition: Descriptors.hpp:371
armnn::LayerType::Activation
@ Activation
armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:89
armnn::ReduceDescriptor
A ReduceDescriptor for the REDUCE operators.
Definition: Descriptors.hpp:1538
armnn::ClDepthwiseConvolutionWorkloadValidate
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
Definition: ClDepthwiseConvolutionWorkload.cpp:26
armnn::ClBackendContext
Definition: ClBackendContext.hpp:17
armnn::Layer::EndOutputSlots
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:267
IMemoryManager.hpp
ArmComputeSubgraphUtils.hpp
armnn::PadLayer
This layer represents a pad operation.
Definition: PadLayer.hpp:14
armnn::IBackendInternal::IBackendSpecificModelContextPtr
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
Definition: IBackendInternal.hpp:96
armnn::ClBackendModelContext::IsFastMathEnabled
bool IsFastMathEnabled() const
Definition: ClBackendModelContext.cpp:66
armnn::ClBackend::CreateMemoryManager
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: ClBackend.cpp:50