40#include <arm_compute/core/Types.h>
41#include <arm_compute/runtime/Allocator.h>
54 return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
61 return std::make_unique<NeonWorkloadFactory>(
68 return std::make_unique<NeonWorkloadFactory>(
75 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
80 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
87 return std::make_unique<NeonWorkloadFactory>(
94 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
99 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
105 return std::make_unique<NeonWorkloadFactory>(
149 auto it = subgraph.
end();
150 std::map<LayerGuid, Layer*> untouched;
152 while (it != subgraph.
begin())
156 untouched.insert({base.
GetGuid(), &base});
160 while (it != subgraph.
begin())
175 if (output->GetNumConnections() == 1)
177 for (
auto&& childInput : output->GetConnections())
180 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
182 Layer& child = childInput->GetOwningLayer();
187 if (untouched.find(activationLayer->GetGuid()) == untouched.end())
192 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
211 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
225 untouched.erase(baseLayer->
GetGuid());
226 untouched.erase(activationLayer->GetGuid());
243 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
256 untouched.erase(baseLayer->
GetGuid());
257 untouched.erase(activationLayer->GetGuid());
274 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
287 untouched.erase(baseLayer->
GetGuid());
288 untouched.erase(activationLayer->GetGuid());
298 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
299 baseLayer->
m_Mean->GetTensorInfo(),
301 baseLayer->
m_Beta->GetTensorInfo(),
302 baseLayer->
m_Gamma->GetTensorInfo(),
315 replacementLayer->
m_Beta = std::move(baseLayer->
m_Beta);
317 replacementLayer->
m_Mean = std::move(baseLayer->
m_Mean);
319 untouched.erase(baseLayer->
GetGuid());
320 untouched.erase(activationLayer->GetGuid());
330 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
340 untouched.erase(baseLayer->
GetGuid());
341 untouched.erase(activationLayer->GetGuid());
351 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
361 untouched.erase(baseLayer->
GetGuid());
362 untouched.erase(activationLayer->GetGuid());
372 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
382 untouched.erase(baseLayer->
GetGuid());
383 untouched.erase(activationLayer->GetGuid());
393 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
403 untouched.erase(baseLayer->
GetGuid());
404 untouched.erase(activationLayer->GetGuid());
416 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
427 untouched.erase(baseLayer->
GetGuid());
428 untouched.erase(activationLayer->GetGuid());
436 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
447 untouched.erase(baseLayer->
GetGuid());
448 untouched.erase(activationLayer->GetGuid());
456 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
467 untouched.erase(baseLayer->
GetGuid());
468 untouched.erase(activationLayer->GetGuid());
476 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
487 untouched.erase(baseLayer->
GetGuid());
488 untouched.erase(activationLayer->GetGuid());
505 if (!reduceDescriptor.
m_vAxis.empty() && reduceDescriptor.
m_vAxis.size() > 1)
514 untouched.erase(baseLayer->
GetGuid());
532 Layer* layerList[4] = {
nullptr,
nullptr,
nullptr,
nullptr};
541 bool fuseReLu =
false;
542 unsigned int numInputs = 0;
543 unsigned int numOutputs = 0;
544 std::vector<TensorInfo> inputInfos;
545 std::vector<TensorInfo> outputInfos;
553 activationDescriptor,
559 {outputInfos.begin(), outputInfos.end()},
561 activationDescriptor);
564 std::string fusedName;
574 std::make_shared<ActivationDescriptor>(*activationDescriptor));
578 std::vector<IConnectableLayer*> originalLayers;
579 for (
unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
581 if (layerList[layerIdx])
583 originalLayers.push_back(layerList[layerIdx]);
587 std::vector<SlotList> inputLayersSlotLists, outputLayersSlotLists;
589 outputInfos.size() > 1,
590 inputLayersSlotLists,
591 outputLayersSlotLists);
596 inputLayersSlotLists,
597 outputLayersSlotLists);
600 for (
unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
602 if (layerList[layerIdx])
604 untouched.erase(layerList[layerIdx]->GetGuid());
621 return optimizationViews;
631 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
636 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
645 return std::make_unique<DefaultAllocator>();
This layer represents an addition operation.
This layer represents a batch normalization operation.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
This layer represents a convolution 2d operation.
This layer represents a depthwise convolution 2d operation.
This layer represents a division operation.
This layer represents a elementwiseBinary operation.
This layer represents a fully connected operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
std::unique_ptr< IBackendContext > IBackendContextPtr
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
IConnectableLayer * AddFusedLayer(const FusedDescriptor &fusedDescriptor, const char *name=nullptr)
Adds a Fused layer to the network.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
std::vector< OutputSlot >::iterator BeginOutputSlots()
LayerGuid GetGuid() const final
Returns the unique id of the layer.
std::shared_ptr< T > GetAdditionalInformation() const
const char * GetName() const override
Returns the name of the layer.
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
void SetAdditionalInfoForObject(const AdditionalInfoObjectPtr &additionalInfo)
std::vector< OutputSlot >::iterator EndOutputSlots()
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
This layer represents a multiplication operation.
static const BackendId & GetIdStatic()
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry ®istry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
static const FactoryId & GetIdStatic()
void AddUntouchedSubgraph(SubgraphView &&subgraph)
const Subgraphs & GetDeletedSubgraphs() const
const Substitutions & GetSubstitutions() const
const TensorInfo & GetTensorInfo() const override
This layer represents a reduction operation.
This layer represents a reshape operation.
The SubgraphView class represents a subgraph of a Graph.
IConnectableLayerIterator begin()
IConnectableLayerIterator end()
This layer represents a subtraction operation.
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
void RegisterCopyAndImportFactoryPair(ITensorHandleFactory::FactoryId copyFactoryId, ITensorHandleFactory::FactoryId importFactoryId)
Register a pair of TensorHandleFactory Id for Memory Copy and TensorHandleFactory Id for Memory Impor...
Copyright (c) 2021 ARM Limited and Contributors.
void GetFusedName(Layer *layerList[4], std::string &fusedName)
void BuildAddMulAddSlotLists(bool handleReLu, bool multipleOutputs, std::vector< SlotListType > &inputLayersSlotLists, std::vector< SlotListType > &outputLayersSlotLists)
LayerType * FuseMultiplicationLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
void ReplaceMultipleLayers(OptimizationViews &optimizationViews, std::vector< IConnectableLayer * > &originalLayers, LayerType *baseLayer, const std::vector< SlotList > inputLayersSlotLists, const std::vector< SlotList > outputLayersSlotLists)
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
LayerType * FuseConvolution2dLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
LayerType * FuseAdditionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
auto PolymorphicPointerDowncast(const SourceType &value)
Polymorphic downcast for shared pointers and build in pointers.
LayerType * FuseBatchNormalizationLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
bool IsLayerSequence(Layer ¤tLayer, TYPE first, TYPE second, TYPE third, Layer *layerList[4], bool handleValidActivates, const std::vector< ActivationFunction > &validActivates)
@ BoundedReLu
min(a, max(b, input)) ReLu1 & ReLu6.
std::vector< BackendOptions > ModelOptions
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
void RemoveReshapeLayer(ReshapeLayer *baseLayer, std::map< LayerGuid, Layer * > &untouched, OptimizationViews &optimizationViews)
std::vector< IConnectableLayer * > ChainReduceLayers(OptimizationViews &optimizationViews, LayerType *baseLayer, ReduceDescriptor &desc)
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
LayerType * FuseFullyConnectedLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
void ReplaceLayers(OptimizationViews &optimizationViews, LayerType *baseLayer, std::vector< IConnectableLayer * > &layers)
LayerType * FuseDivisionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
bool ConnectedToLayerWithNCHW(Layer *baseLayer)
Checks if the Layer is connected to any Layer that has an NCHW layout.
arm_compute::Status NeonFusedWorkloadValidate(const std::vector< std::reference_wrapper< TensorInfo > > &inputInfos, const std::vector< std::reference_wrapper< TensorInfo > > &outputInfos, const FusedDescriptor &fusedDescriptor, const ActivationDescriptor *activationDescriptor)
bool BuildAddMulAddTensorInfoLists(Type *layerList[4], unsigned int &numInputs, unsigned int &numOutputs, std::vector< TensorInfo > &inputInfos, std::vector< TensorInfo > &outputInfos, const ActivationDescriptor *&activationDescriptor, bool &fuseReLu)
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
LayerType * FuseSubtractionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
LayerType * FuseDepthwiseConvolution2dLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
constexpr const char * NeonBackendId()
LayerType * FuseElementwiseBinaryLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, BinaryOperation operation, std::string name)
arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
An ActivationDescriptor for the ActivationLayer.
bool m_BiasEnabled
Enable/disable bias.
bool m_BiasEnabled
Enable/disable bias.
BinaryOperation m_Operation
Specifies the elementwiseBinary operation to execute.
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
A FusedDescriptor for the FusedLayer.
A ReduceDescriptor for the REDUCE operators.
std::vector< uint32_t > m_vAxis
The indices of the dimensions to reduce.