ArmNN
 25.11
Loading...
Searching...
No Matches
ClBackend.cpp
Go to the documentation of this file.
1//
2// Copyright © 2017-2025 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "ClBackend.hpp"
9#include "ClBackendId.hpp"
12#include "ClLayerSupport.hpp"
14#include "ClWorkloadFactory.hpp"
15
17#include <armnn/Descriptors.hpp>
18
21
25
35
36#include <Optimizer.hpp>
37
38#include <arm_compute/core/Types.h>
39#include <arm_compute/runtime/CL/CLBufferAllocator.h>
40
41namespace armnn
42{
43
45{
46 static const BackendId s_Id{ClBackendId()};
47 return s_Id;
48}
49
51{
53 {
54 return std::make_unique<ClMemoryManager>(m_CustomAllocator);
55 }
56 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
57}
58
60 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
61{
62 return std::make_unique<ClWorkloadFactory>(
64}
65
67 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
68{
69 return std::make_unique<ClWorkloadFactory>(
71}
72
74 TensorHandleFactoryRegistry& registry) const
75{
76 std::shared_ptr<ClMemoryManager> memoryManager;
78 {
79 memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
80 }
81 else
82 {
83 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
84 }
85
86 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
87 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
89
90 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
91 registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
92
93 registry.RegisterMemoryManager(memoryManager);
94 registry.RegisterFactory(std::move(factory));
95 registry.RegisterFactory(std::move(importFactory));
96
97 return std::make_unique<ClWorkloadFactory>(
99}
100
102 TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
103{
104 std::shared_ptr<ClMemoryManager> memoryManager;
106 {
107 memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
108 }
109 else
110 {
111 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
112 }
113
114 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
115 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
117
118 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
119 registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
120
121 registry.RegisterMemoryManager(memoryManager);
122 registry.RegisterFactory(std::move(factory));
123 registry.RegisterFactory(std::move(importFactory));
124
125 return std::make_unique<ClWorkloadFactory>(
127}
128
131 const ModelOptions& modelOptions,
132 MemorySourceFlags inputFlags,
133 MemorySourceFlags outputFlags) const
134{
135 // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
136 if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
137 {
138 inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
139 }
140 if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
141 {
142 outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
143 }
144 std::shared_ptr<ClMemoryManager> memoryManager;
146 {
147 memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
148 }
149 else
150 {
151 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
152 }
153
154 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
155 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
156 inputFlags, outputFlags);
157
158 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
159 registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
160
161 registry.RegisterMemoryManager(memoryManager);
162 registry.RegisterFactory(std::move(factory));
163 registry.RegisterFactory(std::move(importFactory));
164
165 return std::make_unique<ClWorkloadFactory>(
167}
168
169std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
170{
171 return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic(),
173}
174
176{
177 std::shared_ptr<ClMemoryManager> memoryManager;
179 {
180 memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
181 }
182 else
183 {
184 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
185 }
186
187 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
188 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
190
191 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
192 registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
193
194 registry.RegisterMemoryManager(memoryManager);
195 registry.RegisterFactory(std::move(factory));
196 registry.RegisterFactory(std::move(importFactory));
197
198}
199
201 MemorySourceFlags inputFlags,
202 MemorySourceFlags outputFlags)
203{
204 // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
205 if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
206 {
207 inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
208 }
209 if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
210 {
211 outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
212 }
213 std::shared_ptr<ClMemoryManager> memoryManager;
215 {
216 memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
217 }
218 else
219 {
220 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
221 }
222
223 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
224 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
225 inputFlags, outputFlags);
226
227 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
228 registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
229
230 registry.RegisterMemoryManager(memoryManager);
231 registry.RegisterFactory(std::move(factory));
232 registry.RegisterFactory(std::move(importFactory));
233}
234
239
245
251
260
262{
263 static ILayerSupportSharedPtr layerSupport
264 {
266 };
267 return layerSupport;
268}
269
270std::unique_ptr<ICustomAllocator> ClBackend::GetDefaultAllocator() const
271{
272 return std::make_unique<ClBackendDefaultAllocator>();
273}
274
276{
277 // add new capabilities here..
278 return BackendCapabilities ("GpuAcc",
279 {
280 {"NonConstWeights", true},
281 {"ProtectedContentAllocation", true},
282 {"ConstantTensorsAsInputs", true},
283 {"PreImportIOTensors", false},
284 {"ExternallyManagedMemory", true},
285 {"MultiAxisPacking", false},
286 {"SingleAxisPacking", true},
287 {"AllOrNothing", false},
288 {"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()}
289 });
290}
291
293 const ModelOptions& modelOptions) const
294{
295 OptimizationViews optimizationViews(modelOptions);
296
297 auto it = subgraph.end();
298 bool isFastMathEnabled = false;
299 std::map<LayerGuid, Layer*> untouched;
300
301 while (it != subgraph.begin())
302 {
303 --it;
304 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
305 untouched.insert({base.GetGuid(), &base});
306 }
307
308 it = subgraph.end();
309#if defined(ARMCOMPUTECL_ENABLED)
311
312 if (modelContextPtr)
313 {
314 auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
315 if (clModelOptions)
316 {
317 isFastMathEnabled = clModelOptions->IsFastMathEnabled();
318 }
319 }
320#endif
321 while (it != subgraph.begin())
322 {
323 --it;
324 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
325
326 // Fuse activation into previous layer if supported by backend
332 && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
333 {
334 for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
335 {
336 if (output->GetNumConnections() == 1)
337 {
338 for (auto&& childInput : output->GetConnections())
339 {
340 if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
341 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
342 {
343 Layer& child = childInput->GetOwningLayer();
344
345 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
346 // Before we proceed make sure that this activation layer is in the subgraph. It could be
347 // the first layer in the next subgraph.
348 if (untouched.find(activationLayer->GetGuid()) == untouched.end())
349 {
350 // We can't fuse a layer that's outside the subgraph.
351 break;
352 }
353
354 const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
355 base.GetName();
356
357 // Get params from activation layer
358 ActivationDescriptor activationDesc = activationLayer->GetParameters();
359
360 if (base.GetType() == LayerType::Convolution2d)
361 {
363
365
366 if (baseLayer->GetParameters().m_BiasEnabled)
367 {
368 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
369 }
370
371 arm_compute::Status status = ClConvolution2dWorkloadValidate(
373 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
374 baseLayer->GetParameters(),
376 biases,
377 isFastMathEnabled,
378 &activationDesc);
379
380 if (status)
381 {
383 baseLayer,
384 activationLayer,
385 activationDesc,
386 name);
387 untouched.erase(baseLayer->GetGuid());
388 untouched.erase(activationLayer->GetGuid());
389 }
390 }
392 {
393 DepthwiseConvolution2dLayer* baseLayer =
395
397
398 if (baseLayer->GetParameters().m_BiasEnabled)
399 {
400 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
401 }
402
403 arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate(
405 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
406 baseLayer->GetParameters(),
408 biases,
409 &activationDesc);
410
411 if (status)
412 {
414 baseLayer,
415 activationLayer,
416 activationDesc,
417 name);
418 untouched.erase(baseLayer->GetGuid());
419 untouched.erase(activationLayer->GetGuid());
420 }
421 }
422 else if (base.GetType() == LayerType::FullyConnected)
423 {
425 FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
426
427 // As bias is optional only try to get TensorInfo from input if bias is enabled.
429 if (descriptor.m_BiasEnabled)
430 {
431 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
432 }
433
434 arm_compute::Status status = ClFullyConnectedWorkloadValidate(
436 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
438 biases,
439 baseLayer->GetParameters(),
440 &activationDesc);
441
442 if (status)
443 {
445 baseLayer,
446 activationLayer,
447 activationDesc,
448 name);
449 untouched.erase(baseLayer->GetGuid());
450 untouched.erase(activationLayer->GetGuid());
451 }
452 }
453 else if (base.GetType() == LayerType::BatchNormalization)
454 {
455 BatchNormalizationLayer* baseLayer =
457
458 arm_compute::Status status = ClBatchNormalizationValidate(
460 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
461 baseLayer->m_Mean->GetTensorInfo(),
462 baseLayer->m_Variance->GetTensorInfo(),
463 baseLayer->m_Beta->GetTensorInfo(),
464 baseLayer->m_Gamma->GetTensorInfo(),
465 baseLayer->GetParameters(),
466 &activationDesc);
467
468 if (status)
469 {
470 BatchNormalizationLayer* replacementLayer =
472 baseLayer,
473 activationLayer,
474 activationDesc,
475 name);
476
477 replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
478 replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
479 replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
480 replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
481
482 untouched.erase(baseLayer->GetGuid());
483 untouched.erase(activationLayer->GetGuid());
484 }
485 }
486 else if (base.GetType() == LayerType::Addition)
487 {
489
490 arm_compute::Status status = ClAdditionValidate(
493 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
494 &activationDesc);
495
496 if (status)
497 {
498 FuseAdditionLayer<AdditionLayer>(optimizationViews,
499 baseLayer,
500 activationLayer,
501 activationDesc,
502 name);
503
504 untouched.erase(baseLayer->GetGuid());
505 untouched.erase(activationLayer->GetGuid());
506 }
507 }
508 else if (base.GetType() == LayerType::Division)
509 {
511
512 arm_compute::Status status = ClDivisionWorkloadValidate(
515 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
516 &activationDesc);
517
518 if (status)
519 {
520 FuseDivisionLayer<DivisionLayer>(optimizationViews,
521 baseLayer,
522 activationLayer,
523 activationDesc,
524 name);
525 untouched.erase(baseLayer->GetGuid());
526 untouched.erase(activationLayer->GetGuid());
527 }
528 }
529 else if (base.GetType() == LayerType::Multiplication)
530 {
532
533 arm_compute::Status status = ClMultiplicationWorkloadValidate(
536 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
537 &activationDesc);
538
539 if (status)
540 {
542 baseLayer,
543 activationLayer,
544 activationDesc,
545 name);
546 untouched.erase(baseLayer->GetGuid());
547 untouched.erase(activationLayer->GetGuid());
548 }
549 }
550 else if (base.GetType() == LayerType::Subtraction)
551 {
553
554 arm_compute::Status status = ClSubtractionValidate(
557 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
558 &activationDesc);
559
560 if (status)
561 {
563 baseLayer,
564 activationLayer,
565 activationDesc,
566 name);
567 untouched.erase(baseLayer->GetGuid());
568 untouched.erase(activationLayer->GetGuid());
569 }
570 }
571 else if (base.GetType() == LayerType::ElementwiseBinary)
572 {
574
576 {
577 arm_compute::Status status = ClAdditionValidate(
580 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
581 &activationDesc);
582
583 if (status)
584 {
586 baseLayer,
587 activationLayer,
588 activationDesc,
590 name);
591 untouched.erase(baseLayer->GetGuid());
592 untouched.erase(activationLayer->GetGuid());
593 }
594 }
595 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
596 {
597 arm_compute::Status status = ClDivisionWorkloadValidate(
600 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
601 &activationDesc);
602
603 if (status)
604 {
606 baseLayer,
607 activationLayer,
608 activationDesc,
610 name);
611 untouched.erase(baseLayer->GetGuid());
612 untouched.erase(activationLayer->GetGuid());
613 }
614 }
615 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
616 {
617 arm_compute::Status status = ClMultiplicationWorkloadValidate(
620 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
621 &activationDesc);
622
623 if (status)
624 {
626 baseLayer,
627 activationLayer,
628 activationDesc,
630 name);
631 untouched.erase(baseLayer->GetGuid());
632 untouched.erase(activationLayer->GetGuid());
633 }
634 }
635 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
636 {
637 arm_compute::Status status = ClSubtractionValidate(
640 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
641 &activationDesc);
642
643 if (status)
644 {
646 baseLayer,
647 activationLayer,
648 activationDesc,
650 name);
651 untouched.erase(baseLayer->GetGuid());
652 untouched.erase(activationLayer->GetGuid());
653 }
654 }
655 // No fusion available for other BinaryOperations
656 }
657 }
658 }
659 }
660 }
661 }
662
663 // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
664 if (base.GetType() == LayerType::Reduce)
665 {
667 ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
668
669 if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
670 {
671 // Add new layers to the graph and connect them.
672 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
673 baseLayer,
674 reduceDescriptor);
675
676 // Replace existing baselayer with new subgraph.
677 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
678 untouched.erase(baseLayer->GetGuid());
679 }
680 }
681
682 // Remove Reshape where possible
683 if (base.GetType() == LayerType::Reshape)
684 {
686
687 // Cannot remove a Reshape if it's connected to any layer that has an NCHW layout
688 if (ConnectedToLayerWithNCHW(baseLayer))
689 {
690 continue;
691 }
692 RemoveReshapeLayer(baseLayer, untouched, optimizationViews);
693 }
694 // Special case to fuse padding into average pooling 2d for quantized datatype.
695 // Required to be done as a backend specific optimization as Neon does not support this special case.
696 if (base.GetType() == LayerType::Pooling2d)
697 {
699 Pooling2dDescriptor poolingDescriptor = baseLayer->GetParameters();
701 {
704 if (padLayer->GetOutputSlot(0).GetNumConnections() == 1 &&
706 poolingDescriptor,
707 padLayer->GetOutputSlot().GetTensorInfo(),
708 true))
709 {
710 FoldPadLayer2d<Pooling2dLayer, Pooling2dDescriptor>(optimizationViews, baseLayer,
711 poolingDescriptor, padLayer);
712 untouched.erase(baseLayer->GetGuid());
713 untouched.erase(padLayer->GetGuid());
714 }
715 }
716 }
717 }
718
719 if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty())
720 {
721 optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
722 }
723 else
724 {
725 ReportUntouchedLayers(optimizationViews, untouched);
726 }
727
728 return optimizationViews;
729}
730
731} // namespace armnn
This layer represents an addition operation.
This layer represents a batch normalization operation.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
static const BackendId & GetIdStatic()
Definition ClBackend.cpp:44
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition ClBackend.cpp:59
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition ClBackend.cpp:50
BackendCapabilities GetCapabilities() const override
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
bool m_UsingCustomAllocator
std::shared_ptr< ClBackendCustomAllocatorWrapper > m_CustomAllocator
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
static const FactoryId & GetIdStatic()
This layer represents a convolution 2d operation.
This layer represents a depthwise convolution 2d operation.
This layer represents a division operation.
This layer represents a elementwiseBinary operation.
This layer represents a fully connected operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
std::unique_ptr< IBackendContext > IBackendContextPtr
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
const OutputSlot * GetConnectedOutputSlot() const
Definition Layer.hpp:56
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition Layer.hpp:337
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition Layer.hpp:339
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition Layer.hpp:266
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition Layer.hpp:343
std::shared_ptr< T > GetAdditionalInformation() const
Definition Layer.hpp:368
const char * GetName() const override
Returns the name of the layer.
Definition Layer.hpp:332
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition Layer.hpp:286
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition Layer.hpp:267
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
This layer represents a multiplication operation.
void AddUntouchedSubgraph(SubgraphView &&subgraph)
const Subgraphs & GetDeletedSubgraphs() const
const Substitutions & GetSubstitutions() const
unsigned int GetNumConnections() const override
Definition Layer.hpp:158
Layer & GetOwningLayer() const
Definition Layer.hpp:132
const TensorInfo & GetTensorInfo() const override
Definition Layer.cpp:100
This layer represents a pad operation.
Definition PadLayer.hpp:15
This layer represents a pooling 2d operation.
This layer represents a reduction operation.
This layer represents a reshape operation.
The SubgraphView class represents a subgraph of a Graph.
IConnectableLayerIterator begin()
IConnectableLayerIterator end()
This layer represents a subtraction operation.
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
void RegisterCopyAndImportFactoryPair(ITensorHandleFactory::FactoryId copyFactoryId, ITensorHandleFactory::FactoryId importFactoryId)
Register a pair of TensorHandleFactory Id for Memory Copy and TensorHandleFactory Id for Memory Impor...
bool TryFoldPadIntoLayer2d(const PadDescriptor &padDescriptor, Descriptor &layerDescriptor, const TensorInfo &tensorInfo)
Copyright (c) 2021 ARM Limited and Contributors.
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
LayerType * FuseMultiplicationLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
LayerType * FuseConvolution2dLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
arm_compute::Status ClAdditionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
LayerType * FuseAdditionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
arm_compute::Status ClBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
auto PolymorphicPointerDowncast(const SourceType &value)
Polymorphic downcast for shared pointers and build in pointers.
LayerType * FuseBatchNormalizationLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
unsigned int MemorySourceFlags
std::vector< BackendOptions > ModelOptions
arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
void RemoveReshapeLayer(ReshapeLayer *baseLayer, std::map< LayerGuid, Layer * > &untouched, OptimizationViews &optimizationViews)
std::vector< IConnectableLayer * > ChainReduceLayers(OptimizationViews &optimizationViews, LayerType *baseLayer, ReduceDescriptor &desc)
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
LayerType * FuseFullyConnectedLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
void ReplaceLayers(OptimizationViews &optimizationViews, LayerType *baseLayer, std::vector< IConnectableLayer * > &layers)
constexpr const char * ClBackendId()
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
void FoldPadLayer2d(OptimizationViews &optimizationViews, LayerT *baseLayer, Descriptor &descriptor, PadLayer *padLayer)
LayerType * FuseDivisionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
bool ConnectedToLayerWithNCHW(Layer *baseLayer)
Checks if the Layer is connected to any Layer that has an NCHW layout.
BackendOptions BackendCapabilities
LayerType * FuseSubtractionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
LayerType * FuseDepthwiseConvolution2dLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
LayerType * FuseElementwiseBinaryLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, BinaryOperation operation, std::string name)
arm_compute::Status ClSubtractionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
An ActivationDescriptor for the ActivationLayer.
bool m_BiasEnabled
Enable/disable bias.
bool m_BiasEnabled
Enable/disable bias.
BinaryOperation m_Operation
Specifies the elementwiseBinary operation to execute.
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
A Pooling2dDescriptor for the Pooling2dLayer.
A ReduceDescriptor for the REDUCE operators.
std::vector< uint32_t > m_vAxis
The indices of the dimensions to reduce.