36 #include <arm_compute/core/Types.h> 37 #include <arm_compute/runtime/Allocator.h> 50 return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
57 return std::make_unique<NeonWorkloadFactory>(
58 PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
64 return std::make_unique<NeonWorkloadFactory>(
71 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
75 tensorHandleFactoryRegistry.
RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
77 return std::make_unique<NeonWorkloadFactory>(
78 PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
84 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
88 tensorHandleFactoryRegistry.
RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
90 return std::make_unique<NeonWorkloadFactory>(
148 auto it = subgraph.
end();
149 std::map<LayerGuid, Layer*> untouched;
151 while (it != subgraph.
begin())
155 untouched.insert({base.
GetGuid(), &base});
159 while (it != subgraph.
begin())
172 if (output->GetNumConnections() == 1)
174 for (
auto&& childInput : output->GetConnections())
177 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
179 Layer& child = childInput->GetOwningLayer();
181 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
183 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
197 biases = baseLayer->
m_Bias->GetTensorInfo();
202 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
204 baseLayer->
m_Weight->GetTensorInfo(),
211 FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
216 untouched.erase(baseLayer->GetGuid());
217 untouched.erase(activationLayer->GetGuid());
223 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
229 biases = baseLayer->
m_Bias->GetTensorInfo();
234 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
236 baseLayer->
m_Weight->GetTensorInfo(),
242 FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
247 untouched.erase(baseLayer->GetGuid());
248 untouched.erase(activationLayer->GetGuid());
257 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
258 baseLayer->
m_Weight->GetTensorInfo(),
259 baseLayer->
m_Bias->GetTensorInfo(),
265 FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
270 untouched.erase(baseLayer->GetGuid());
271 untouched.erase(activationLayer->GetGuid());
277 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
281 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
282 baseLayer->
m_Mean->GetTensorInfo(),
284 baseLayer->
m_Beta->GetTensorInfo(),
285 baseLayer->
m_Gamma->GetTensorInfo(),
292 FuseLayerWithParameters<BatchNormalizationLayer>(
299 replacementLayer->
m_Beta = std::move(baseLayer->m_Beta);
300 replacementLayer->
m_Gamma = std::move(baseLayer->m_Gamma);
301 replacementLayer->
m_Mean = std::move(baseLayer->m_Mean);
302 replacementLayer->
m_Variance = std::move(baseLayer->m_Variance);
303 untouched.erase(baseLayer->GetGuid());
304 untouched.erase(activationLayer->GetGuid());
309 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
314 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
319 FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
324 untouched.erase(baseLayer->GetGuid());
325 untouched.erase(activationLayer->GetGuid());
330 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
335 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
340 FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
345 untouched.erase(baseLayer->GetGuid());
346 untouched.erase(activationLayer->GetGuid());
356 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
361 FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
366 untouched.erase(baseLayer->GetGuid());
367 untouched.erase(activationLayer->GetGuid());
372 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
377 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
382 FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
387 untouched.erase(baseLayer->GetGuid());
388 untouched.erase(activationLayer->GetGuid());
407 return optimizationViews;
417 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
421 registry.
RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
bool m_BiasEnabled
Enable/disable bias.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
This layer represents a batch normalization operation.
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool m_BiasEnabled
Enable/disable bias.
std::vector< OptimizationPtr > Optimizations
bool HasCapability(BackendCapability capabilityClass) const override
Returns true if backend support the capability false otherwise.
const Parameters & GetParameters() const
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
This layer represents a depthwise convolution 2d operation.
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph) const override
std::vector< BackendOptions > ModelOptions
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
constexpr const char * NeonBackendId()
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const TensorInfo &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Copyright (c) 2021 ARM Limited and Contributors.
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
The SubgraphView class represents a subgraph of a Graph.
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry ®istry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
std::unique_ptr< armnn::profiling::IBackendProfiling > IBackendProfilingPtr
This layer represents a fully connected operation.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
BackendCapability
BackendCapability class.
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
An ActivationDescriptor for the ActivationLayer.
void AddUntouchedSubgraph(SubgraphView &&subgraph)
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
This layer represents an addition operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
const Substitutions & GetSubstitutions() const
This layer represents a subtraction operation.
IBackendInternal::Optimizations GetOptimizations() const override
std::vector< OutputSlot >::iterator BeginOutputSlots()
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
This layer represents a division operation.
std::vector< OutputSlot >::iterator EndOutputSlots()
static const BackendId & GetIdStatic()
static const FactoryId & GetIdStatic()
const char * GetName() const override
Returns the name of the layer.
This layer represents a convolution 2d operation.
const std::set< armnn::BackendCapability > cpuAccCapabilities
This layer represents a multiplication operation.
const TensorInfo & GetTensorInfo() const override
std::shared_ptr< armnn::profiling::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace...
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< T > GetAdditionalInformation() const
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
LayerGuid GetGuid() const final
Returns the unique id of the layer.
std::unique_ptr< IBackendContext > IBackendContextPtr