ArmNN
 25.11
Loading...
Searching...
No Matches
NeonBackend.cpp
Go to the documentation of this file.
1//
2// Copyright © 2017-2025 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "NeonBackend.hpp"
7#include "NeonBackendId.hpp"
10#include "NeonLayerSupport.hpp"
13
15#include <armnn/Descriptors.hpp>
16
20
23
25
37
38#include <Optimizer.hpp>
39
40#include <arm_compute/core/Types.h>
41#include <arm_compute/runtime/Allocator.h>
42
43namespace armnn
44{
45
47{
48 static const BackendId s_Id{NeonBackendId()};
49 return s_Id;
50}
51
53{
54 return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
56}
57
59 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
60{
61 return std::make_unique<NeonWorkloadFactory>(
63}
64
66 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
67{
68 return std::make_unique<NeonWorkloadFactory>(
70}
71
73 class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
74{
75 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
77
78 tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
79
80 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
81 // Register copy and import factory pair
82 tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
83 // Register the factory
84 tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));
85
86
87 return std::make_unique<NeonWorkloadFactory>(
89}
90
92 TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
93{
94 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
96
97 tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
98
99 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
100 // Register copy and import factory pair
101 tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
102 // Register the factory
103 tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));
104
105 return std::make_unique<NeonWorkloadFactory>(
107}
108
113
119
125
134
136{
137 static ILayerSupportSharedPtr layerSupport
138 {
140 };
141 return layerSupport;
142}
143
145 const ModelOptions& modelOptions) const
146{
147 OptimizationViews optimizationViews(modelOptions);
148
149 auto it = subgraph.end();
150 std::map<LayerGuid, Layer*> untouched;
151
152 while (it != subgraph.begin())
153 {
154 --it;
155 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
156 untouched.insert({base.GetGuid(), &base});
157 }
158
159 it = subgraph.end();
160 while (it != subgraph.begin())
161 {
162 --it;
163 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
164
165 // Fuse activation into previous layer if supported by backend
171 && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
172 {
173 for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
174 {
175 if (output->GetNumConnections() == 1)
176 {
177 for (auto&& childInput : output->GetConnections())
178 {
179 if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
180 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
181 {
182 Layer& child = childInput->GetOwningLayer();
183
184 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
185 // Before we proceed make sure that this activation layer is in the subgraph. It could be
186 // the first layer in the next subgraph.
187 if (untouched.find(activationLayer->GetGuid()) == untouched.end())
188 {
189 // We can't fuse a layer that's outside the subgraph.
190 break;
191 }
192 const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
193 base.GetName();
194
195 // Get params from activation layer
196 ActivationDescriptor activationDesc = activationLayer->GetParameters();
197
198 if (base.GetType() == LayerType::Convolution2d)
199 {
201
203
204 if (baseLayer->GetParameters().m_BiasEnabled)
205 {
206 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
207 }
208
209 arm_compute::Status status = NeonConvolution2dWorkloadValidate(
211 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
212 baseLayer->GetParameters(),
214 biases,
215 false,
216 &activationDesc);
217
218 if (status)
219 {
221 baseLayer,
222 activationLayer,
223 activationDesc,
224 name);
225 untouched.erase(baseLayer->GetGuid());
226 untouched.erase(activationLayer->GetGuid());
227 }
228 }
230 {
231 DepthwiseConvolution2dLayer* baseLayer =
233
235
236 if (baseLayer->GetParameters().m_BiasEnabled)
237 {
238 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
239 }
240
241 arm_compute::Status status = NeonDepthwiseConvolutionWorkloadValidate(
243 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
244 baseLayer->GetParameters(),
246 biases,
247 &activationDesc);
248
249 if (status)
250 {
252 baseLayer,
253 activationLayer,
254 activationDesc,
255 name);
256 untouched.erase(baseLayer->GetGuid());
257 untouched.erase(activationLayer->GetGuid());
258 }
259 }
260 else if (base.GetType() == LayerType::FullyConnected)
261 {
263 FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
264
265 // As bias is optional only try to get TensorInfo from input if bias is enabled.
267 if (descriptor.m_BiasEnabled)
268 {
269 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
270 }
271
272 arm_compute::Status status = NeonFullyConnectedWorkloadValidate(
274 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
276 biases,
277 baseLayer->GetParameters(),
278 &activationDesc);
279
280 if (status)
281 {
283 baseLayer,
284 activationLayer,
285 activationDesc,
286 name);
287 untouched.erase(baseLayer->GetGuid());
288 untouched.erase(activationLayer->GetGuid());
289 }
290 }
291 else if (base.GetType() == LayerType::BatchNormalization)
292 {
293 BatchNormalizationLayer* baseLayer =
295
296 arm_compute::Status status = NeonBatchNormalizationValidate(
298 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
299 baseLayer->m_Mean->GetTensorInfo(),
300 baseLayer->m_Variance->GetTensorInfo(),
301 baseLayer->m_Beta->GetTensorInfo(),
302 baseLayer->m_Gamma->GetTensorInfo(),
303 baseLayer->GetParameters(),
304 &activationDesc);
305
306 if (status)
307 {
308 BatchNormalizationLayer* replacementLayer =
310 baseLayer,
311 activationLayer,
312 activationDesc,
313 name);
314
315 replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
316 replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
317 replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
318 replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
319 untouched.erase(baseLayer->GetGuid());
320 untouched.erase(activationLayer->GetGuid());
321 }
322 }
323 else if (base.GetType() == LayerType::Addition)
324 {
326
327 arm_compute::Status status = NeonAdditionWorkloadValidate(
330 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
331 &activationDesc);
332
333 if (status)
334 {
335 FuseAdditionLayer<AdditionLayer>(optimizationViews,
336 baseLayer,
337 activationLayer,
338 activationDesc,
339 name);
340 untouched.erase(baseLayer->GetGuid());
341 untouched.erase(activationLayer->GetGuid());
342 }
343 }
344 else if (base.GetType() == LayerType::Division)
345 {
347
348 arm_compute::Status status = NeonDivisionWorkloadValidate(
351 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
352 &activationDesc);
353
354 if (status)
355 {
356 FuseDivisionLayer<DivisionLayer>(optimizationViews,
357 baseLayer,
358 activationLayer,
359 activationDesc,
360 name);
361 untouched.erase(baseLayer->GetGuid());
362 untouched.erase(activationLayer->GetGuid());
363 }
364 }
365 else if (base.GetType() == LayerType::Multiplication)
366 {
368
369 arm_compute::Status status = NeonMultiplicationWorkloadValidate(
372 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
373 &activationDesc);
374
375 if (status)
376 {
378 baseLayer,
379 activationLayer,
380 activationDesc,
381 name);
382 untouched.erase(baseLayer->GetGuid());
383 untouched.erase(activationLayer->GetGuid());
384 }
385 }
386 else if (base.GetType() == LayerType::Subtraction)
387 {
389
390 arm_compute::Status status = NeonSubtractionWorkloadValidate(
393 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
394 &activationDesc);
395
396 if (status)
397 {
399 baseLayer,
400 activationLayer,
401 activationDesc,
402 name);
403 untouched.erase(baseLayer->GetGuid());
404 untouched.erase(activationLayer->GetGuid());
405 }
406 }
407 else if (base.GetType() == LayerType::ElementwiseBinary)
408 {
410
412 {
413 arm_compute::Status status = NeonAdditionWorkloadValidate(
416 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
417 &activationDesc);
418
419 if (status)
420 {
422 baseLayer,
423 activationLayer,
424 activationDesc,
426 name);
427 untouched.erase(baseLayer->GetGuid());
428 untouched.erase(activationLayer->GetGuid());
429 }
430 }
431 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
432 {
433 arm_compute::Status status = NeonDivisionWorkloadValidate(
436 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
437 &activationDesc);
438
439 if (status)
440 {
442 baseLayer,
443 activationLayer,
444 activationDesc,
446 name);
447 untouched.erase(baseLayer->GetGuid());
448 untouched.erase(activationLayer->GetGuid());
449 }
450 }
451 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
452 {
453 arm_compute::Status status = NeonMultiplicationWorkloadValidate(
456 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
457 &activationDesc);
458
459 if (status)
460 {
462 baseLayer,
463 activationLayer,
464 activationDesc,
466 name);
467 untouched.erase(baseLayer->GetGuid());
468 untouched.erase(activationLayer->GetGuid());
469 }
470 }
471 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
472 {
473 arm_compute::Status status = NeonSubtractionWorkloadValidate(
476 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
477 &activationDesc);
478
479 if (status)
480 {
482 baseLayer,
483 activationLayer,
484 activationDesc,
486 name);
487 untouched.erase(baseLayer->GetGuid());
488 untouched.erase(activationLayer->GetGuid());
489 }
490 }
491 // No fusion available for other BinaryOperations
492 }
493 }
494 }
495 }
496 }
497 }
498
499 // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
500 if (base.GetType() == LayerType::Reduce)
501 {
503 ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
504
505 if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
506 {
507 // Add new layers to the graph and connect them.
508 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
509 baseLayer,
510 reduceDescriptor);
511
512 // Replace existing baselayer with new subgraph.
513 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
514 untouched.erase(baseLayer->GetGuid());
515 }
516 }
517
518 // Remove Reshape where possible
519 if (base.GetType() == LayerType::Reshape)
520 {
522
523 // Cannot remove a Reshape if it's connected to any layer that has an NCHW layout
524 if (ConnectedToLayerWithNCHW(baseLayer))
525 {
526 continue;
527 }
528 RemoveReshapeLayer(baseLayer, untouched, optimizationViews);
529 }
530
531 // Replace Add/Mul/Add where possible
532 Layer* layerList[4] = {nullptr, nullptr, nullptr, nullptr};
533 const std::vector<ActivationFunction> validActivates = { ActivationFunction::ReLu,
537 layerList,
538 true, // handleValidActivates
539 validActivates))
540 {
541 bool fuseReLu = false;
542 unsigned int numInputs = 0;
543 unsigned int numOutputs = 0;
544 std::vector<TensorInfo> inputInfos;
545 std::vector<TensorInfo> outputInfos;
546 const ActivationDescriptor* activationDescriptor = nullptr;
547
549 numInputs,
550 numOutputs,
551 inputInfos,
552 outputInfos,
553 activationDescriptor,
554 fuseReLu))
555 {
556 // Create the new Add/Mul/Add layer and set the Relu activation function
557 FusedDescriptor fusedDescriptor(numInputs, numOutputs, FusedKernelType::AddMulAdd);
558 arm_compute::Status status = NeonFusedWorkloadValidate({inputInfos.begin(), inputInfos.end()},
559 {outputInfos.begin(), outputInfos.end()},
560 fusedDescriptor,
561 activationDescriptor);
562 if (status)
563 {
564 std::string fusedName;
565 GetFusedName(layerList, fusedName);
566
567 IConnectableLayer* addMulAddLayer =
568 optimizationViews.GetINetwork()->AddFusedLayer(fusedDescriptor, fusedName.c_str());
569
570 if (fuseReLu)
571 {
572 FusedLayer* addMulAddFusedLayer = PolymorphicDowncast<FusedLayer*>(addMulAddLayer);
573 addMulAddFusedLayer->SetAdditionalInfoForObject(
574 std::make_shared<ActivationDescriptor>(*activationDescriptor));
575 }
576
577 // Update the graph
578 std::vector<IConnectableLayer*> originalLayers;
579 for (unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
580 {
581 if (layerList[layerIdx])
582 {
583 originalLayers.push_back(layerList[layerIdx]);
584 }
585 }
586
587 std::vector<SlotList> inputLayersSlotLists, outputLayersSlotLists;
589 outputInfos.size() > 1,
590 inputLayersSlotLists,
591 outputLayersSlotLists);
592
593 ReplaceMultipleLayers<FusedLayer>(optimizationViews,
594 originalLayers,
595 PolymorphicDowncast<FusedLayer*>(addMulAddLayer),
596 inputLayersSlotLists,
597 outputLayersSlotLists);
598
599 // Remove unused layers
600 for (unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
601 {
602 if (layerList[layerIdx])
603 {
604 untouched.erase(layerList[layerIdx]->GetGuid());
605 }
606 }
607 }
608 }
609 }
610 }
611
612 if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty())
613 {
614 optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
615 }
616 else
617 {
618 ReportUntouchedLayers(optimizationViews, untouched);
619 }
620
621 return optimizationViews;
622}
623
624std::vector<ITensorHandleFactory::FactoryId> NeonBackend::GetHandleFactoryPreferences() const
625{
626 return std::vector<ITensorHandleFactory::FactoryId>() = { NeonTensorHandleFactory::GetIdStatic() };
627}
628
630{
631 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
633
634 registry.RegisterMemoryManager(memoryManager);
635
636 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
637 // Register copy and import factory pair
638 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
639 // Register the factory
640 registry.RegisterFactory(std::move(factory));
641}
642
643std::unique_ptr<ICustomAllocator> NeonBackend::GetDefaultAllocator() const
644{
645 return std::make_unique<DefaultAllocator>();
646}
647
648
649} // namespace armnn
This layer represents an addition operation.
This layer represents a batch normalization operation.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
This layer represents a convolution 2d operation.
This layer represents a depthwise convolution 2d operation.
This layer represents a division operation.
This layer represents a elementwiseBinary operation.
This layer represents a fully connected operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
std::unique_ptr< IBackendContext > IBackendContextPtr
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition INetwork.hpp:81
IConnectableLayer * AddFusedLayer(const FusedDescriptor &fusedDescriptor, const char *name=nullptr)
Adds a Fused layer to the network.
Definition Network.cpp:338
const OutputSlot * GetConnectedOutputSlot() const
Definition Layer.hpp:56
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition Layer.hpp:337
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition Layer.hpp:266
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition Layer.hpp:343
std::shared_ptr< T > GetAdditionalInformation() const
Definition Layer.hpp:368
const char * GetName() const override
Returns the name of the layer.
Definition Layer.hpp:332
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition Layer.hpp:286
void SetAdditionalInfoForObject(const AdditionalInfoObjectPtr &additionalInfo)
Definition Layer.hpp:373
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition Layer.hpp:267
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
This layer represents a multiplication operation.
static const BackendId & GetIdStatic()
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
static const FactoryId & GetIdStatic()
void AddUntouchedSubgraph(SubgraphView &&subgraph)
const Subgraphs & GetDeletedSubgraphs() const
const Substitutions & GetSubstitutions() const
const TensorInfo & GetTensorInfo() const override
Definition Layer.cpp:100
This layer represents a reduction operation.
This layer represents a reshape operation.
The SubgraphView class represents a subgraph of a Graph.
IConnectableLayerIterator begin()
IConnectableLayerIterator end()
This layer represents a subtraction operation.
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
void RegisterCopyAndImportFactoryPair(ITensorHandleFactory::FactoryId copyFactoryId, ITensorHandleFactory::FactoryId importFactoryId)
Register a pair of TensorHandleFactory Id for Memory Copy and TensorHandleFactory Id for Memory Impor...
Copyright (c) 2021 ARM Limited and Contributors.
void GetFusedName(Layer *layerList[4], std::string &fusedName)
void BuildAddMulAddSlotLists(bool handleReLu, bool multipleOutputs, std::vector< SlotListType > &inputLayersSlotLists, std::vector< SlotListType > &outputLayersSlotLists)
LayerType * FuseMultiplicationLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
void ReplaceMultipleLayers(OptimizationViews &optimizationViews, std::vector< IConnectableLayer * > &originalLayers, LayerType *baseLayer, const std::vector< SlotList > inputLayersSlotLists, const std::vector< SlotList > outputLayersSlotLists)
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
LayerType * FuseConvolution2dLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
LayerType * FuseAdditionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
auto PolymorphicPointerDowncast(const SourceType &value)
Polymorphic downcast for shared pointers and build in pointers.
LayerType * FuseBatchNormalizationLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
bool IsLayerSequence(Layer &currentLayer, TYPE first, TYPE second, TYPE third, Layer *layerList[4], bool handleValidActivates, const std::vector< ActivationFunction > &validActivates)
@ BoundedReLu
min(a, max(b, input)) ReLu1 & ReLu6.
Definition Types.hpp:92
std::vector< BackendOptions > ModelOptions
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
void RemoveReshapeLayer(ReshapeLayer *baseLayer, std::map< LayerGuid, Layer * > &untouched, OptimizationViews &optimizationViews)
std::vector< IConnectableLayer * > ChainReduceLayers(OptimizationViews &optimizationViews, LayerType *baseLayer, ReduceDescriptor &desc)
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
LayerType * FuseFullyConnectedLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
void ReplaceLayers(OptimizationViews &optimizationViews, LayerType *baseLayer, std::vector< IConnectableLayer * > &layers)
LayerType * FuseDivisionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
bool ConnectedToLayerWithNCHW(Layer *baseLayer)
Checks if the Layer is connected to any Layer that has an NCHW layout.
arm_compute::Status NeonFusedWorkloadValidate(const std::vector< std::reference_wrapper< TensorInfo > > &inputInfos, const std::vector< std::reference_wrapper< TensorInfo > > &outputInfos, const FusedDescriptor &fusedDescriptor, const ActivationDescriptor *activationDescriptor)
bool BuildAddMulAddTensorInfoLists(Type *layerList[4], unsigned int &numInputs, unsigned int &numOutputs, std::vector< TensorInfo > &inputInfos, std::vector< TensorInfo > &outputInfos, const ActivationDescriptor *&activationDescriptor, bool &fuseReLu)
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
LayerType * FuseSubtractionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
LayerType * FuseDepthwiseConvolution2dLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
constexpr const char * NeonBackendId()
LayerType * FuseElementwiseBinaryLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, BinaryOperation operation, std::string name)
arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
An ActivationDescriptor for the ActivationLayer.
bool m_BiasEnabled
Enable/disable bias.
bool m_BiasEnabled
Enable/disable bias.
BinaryOperation m_Operation
Specifies the elementwiseBinary operation to execute.
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
A FusedDescriptor for the FusedLayer.
A ReduceDescriptor for the REDUCE operators.
std::vector< uint32_t > m_vAxis
The indices of the dimensions to reduce.