Namespaces
	experimental

	optimizations

	profiling

	stringUtils

	timelinedecoder

	utility

Classes
struct	abs

class	AbsLayer

struct	AbsQueueDescriptor

struct	ActivationDescriptor
	An ActivationDescriptor for the ActivationLayer. More...

class	ActivationLayer
	This layer represents an activation operation with the specified activation function. More...

struct	ActivationQueueDescriptor

class	AddedLayerObservable

class	AdditionLayer
	This layer represents an addition operation. More...

struct	AdditionQueueDescriptor

struct	Allocator

struct	ArgMinMaxDescriptor
	An ArgMinMaxDescriptor for ArgMinMaxLayer. More...

class	ArgMinMaxLayer
	This layer represents a ArgMinMax operation. More...

struct	ArgMinMaxQueueDescriptor

class	ArmNNProfilingServiceInitialiser

class	BackendCapabilityException

class	BackendId

struct	BackendOptions
	Struct for the users to pass backend specific options. More...

class	BackendRegistry

struct	BackendSettings

class	BackendUnavailableException
	Class for non-fatal exceptions raised while initialising a backend. More...

struct	BackendVersion

class	BadOptionalAccessException

struct	BaseDescriptor
	Base class for all descriptors. More...

class	BaseIterator

class	BaseMemoryManager

class	BaseTensor

class	BaseWorkload

class	BatchMatMul

struct	BatchMatMulDescriptor
	A BatchMatMulDescriptor for the BatchMatMul operator. More...

class	BatchMatMulLayer

struct	BatchMatMulQueueDescriptor

struct	BatchNormalizationDescriptor
	A BatchNormalizationDescriptor for the BatchNormalizationLayer. More...

class	BatchNormalizationLayer
	This layer represents a batch normalization operation. More...

struct	BatchNormalizationQueueDescriptor

struct	BatchToSpaceNdDescriptor
	A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer. More...

class	BatchToSpaceNdLayer
	This layer represents a BatchToSpaceNd operation. More...

struct	BatchToSpaceNdQueueDescriptor

class	BFloat16

struct	BiasAndWeightsTypesCompatible

struct	BiasAndWeightsTypesMatch

class	BindableLayer

class	BooleanDecoder

class	BooleanDecoderBool

class	BooleanEncoder

struct	BroadcastLoop

struct	BroadcastToDescriptor

class	BroadcastToLayer

struct	BroadcastToQueueDescriptor

struct	BufferStorage

struct	Capability
	Capability of the TensorHandleFactory. More...

class	CastLayer
	This layer represents a cast operation. More...

struct	CastQueueDescriptor

struct	ceil

struct	ChannelShuffleDescriptor
	A ChannelShuffleDescriptor for the ChannelShuffle operator. More...

class	ChannelShuffleLayer

struct	ChannelShuffleQueueDescriptor

struct	CheckLocation

class	ClAbsWorkload

class	ClActivationWorkload

class	ClAdditionWorkload

class	ClArgMinMaxWorkload

class	ClBackend

class	ClBackendContext

class	ClBackendDefaultAllocator
	Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...

class	ClBackendModelContext
	The ClBackendModelContext is used to pass in CL specific backend ModelOptions. More...

class	ClBaseWorkload

class	ClBatchMatMulWorkload

class	ClBatchNormalizationFloatWorkload

class	ClBatchToSpaceNdWorkload

class	ClCastWorkload

class	ClChannelShuffleWorkload

class	ClComparisonWorkload

class	ClConcatWorkload

class	ClConstantWorkload

struct	ClContextBuilder

class	ClContextControl

class	ClContextDeserializer

class	ClContextSerializer

class	ClConvertFp16ToFp32Workload

class	ClConvertFp32ToFp16Workload

class	ClConvolution2dWorkload

class	ClConvolution3dWorkload

class	ClDepthToSpaceWorkload

class	ClDepthwiseConvolutionWorkload

class	ClDequantizeWorkload

class	ClDivisionWorkload

class	ClElementwiseBinaryWorkload

class	ClExpWorkload

class	ClFillWorkload

class	ClFloorFloatWorkload

class	ClFullyConnectedWorkload

class	ClGatherNdWorkload

class	ClGatherWorkload

class	ClImportSubTensorHandle

class	ClImportTensorHandle

class	ClImportTensorHandleFactory
	This factory creates ClImportTensorHandles that refer to imported memory tensors. More...

class	ClInstanceNormalizationWorkload

class	ClL2NormalizationFloatWorkload

class	ClLayerSupport

class	ClLogicalAndWorkload

class	ClLogicalNotWorkload

class	ClLogicalOrWorkload

class	ClLogSoftmaxWorkload

class	ClLogWorkload

class	ClLstmFloatWorkload

class	ClMaximumWorkload

class	ClMeanWorkload

class	ClMemoryManager

class	ClMinimumWorkload

class	ClMultiplicationWorkload

class	ClNegWorkload

class	ClNormalizationFloatWorkload

class	ClPadWorkload

class	ClPermuteWorkload

class	ClPooling2dWorkload

class	ClPooling3dWorkload

class	ClPreluWorkload

class	ClQLstmWorkload

class	ClQuantizedLstmWorkload

class	ClQuantizeWorkload

struct	ClRankWorkload

class	ClReduceWorkload

class	ClReshapeWorkload

class	ClResizeWorkload

class	ClReverseV2Workload

class	ClRsqrtWorkload

class	ClRuntimeUnavailableException

class	ClSinWorkload

class	ClSliceWorkload

class	ClSoftmaxWorkload

class	ClSpaceToBatchNdWorkload

class	ClSpaceToDepthWorkload

class	ClSplitterWorkload

class	ClSqrtWorkload

class	ClStackWorkload

class	ClStridedSliceWorkload

class	ClSubTensorHandle

class	ClSubtractionWorkload

class	ClTensorDecorator
	ClTensorDecorator wraps an existing CL tensor allowing us to override the TensorInfo for it. More...

class	ClTensorHandle

class	ClTensorHandleDecorator

class	ClTensorHandleFactory

class	ClTileWorkload

class	ClTransposeConvolution2dWorkload

class	ClTransposeWorkload

class	ClTunedParameters

class	ClUnidirectionalSequenceLstmFloatWorkload

class	ClWorkloadFactory

struct	ComparisonDescriptor
	A ComparisonDescriptor for the ComparisonLayer. More...

class	ComparisonLayer
	This layer represents a comparison operation. More...

struct	ComparisonQueueDescriptor

class	ConcatLayer
	This layer represents a merge operation. More...

struct	ConcatQueueDescriptor

class	ConstantLayer
	A layer that the constant data can be bound to. More...

class	ConstantMemoryStrategy

struct	ConstantQueueDescriptor

class	ConstPassthroughTensorHandle

struct	ConstructInPlace
	Disambiguation tag that can be passed to the constructor to indicate that the contained object should be constructed in-place. More...

class	ConstTensor
	A tensor defined by a TensorInfo (shape and data type) and an immutable backing store. More...

class	ConstTensorHandle

class	ConvertFp16ToFp32Layer
	This layer converts data type Float 16 to Float 32. More...

struct	ConvertFp16ToFp32QueueDescriptor

class	ConvertFp32ToFp16Layer
	This layer converts data type Float 32 to Float 16. More...

struct	ConvertFp32ToFp16QueueDescriptor

struct	Convolution2dDescriptor
	A Convolution2dDescriptor for the Convolution2dLayer. More...

class	Convolution2dLayer
	This layer represents a convolution 2d operation. More...

struct	Convolution2dQueueDescriptor

struct	Convolution3dDescriptor
	A Convolution3dDescriptor for the Convolution3dLayer. More...

class	Convolution3dLayer
	This layer represents a convolution 3d operation. More...

struct	Convolution3dQueueDescriptor

class	CopyMemGenericWorkload

class	DebugLayer
	This layer visualizes the data flowing through the network. More...

struct	DebugQueueDescriptor

class	Decoder

class	DefaultAllocator
	Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...

class	DepthToSpaceLayer
	This layer represents a DepthToSpace operation. More...

struct	DepthToSpaceQueueDescriptor

struct	DepthwiseConvolution2dDescriptor
	A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer. More...

class	DepthwiseConvolution2dLayer
	This layer represents a depthwise convolution 2d operation. More...

struct	DepthwiseConvolution2dQueueDescriptor
	Depthwise Convolution 2D layer workload data. More...

class	DequantizeLayer
	This layer dequantizes the input tensor. More...

struct	DequantizeQueueDescriptor

struct	DetectionPostProcessDescriptor

class	DetectionPostProcessLayer
	This layer represents a detection postprocess operator. More...

struct	DetectionPostProcessQueueDescriptor

class	DeviceSpec

class	DivisionLayer
	This layer represents a division operation. More...

struct	DivisionQueueDescriptor

class	DotAttributeSet

class	DotBase

class	DotDefaults

class	DotEdge

class	DotGraph

class	DotNode

class	DynamicBackend

class	DynamicBackendUtils

class	ElementwiseBaseLayer
	NOTE: this is an abstract class to encapsulate the element wise operations, it does not implement: std::unique_ptr<IWorkload> Layer::CreateWorkload(const IWorkloadFactory& factory) const = 0; Layer* Clone(Graph& graph) const = 0;. More...

struct	ElementwiseBinaryDescriptor
	A ElementwiseBinaryDescriptor for the ElementwiseBinaryLayer. More...

struct	ElementwiseBinaryFunction

class	ElementwiseBinaryLayer
	This layer represents a elementwiseBinary operation. More...

struct	ElementwiseBinaryQueueDescriptor

struct	ElementwiseUnaryDescriptor
	A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer. More...

struct	ElementwiseUnaryFunction

class	ElementwiseUnaryLayer
	This layer represents a elementwiseUnary operation. More...

struct	ElementwiseUnaryQueueDescriptor

struct	EmptyOptional
	EmptyOptional is used to initialize the Optional class in case we want to have default value for an Optional in a function declaration. More...

class	Encoder

struct	EqualQueueDescriptor

class	ErasedLayerNamesObservable

class	Event
	Event class records measurements reported by BeginEvent()/EndEvent() and returns measurements when Event::GetMeasurements() is called. More...

class	Exception
	Base class for all ArmNN exceptions so that users can filter to just those. More...

class	ExecutionFrame

struct	exp

struct	FakeQuantizationDescriptor
	A FakeQuantizationDescriptor for the FakeQuantizationLayer. More...

class	FakeQuantizationLayer
	This layer represents a fake quantization operation. More...

struct	FakeQuantizationQueueDescriptor

class	FileNotFoundException

struct	FillDescriptor
	A FillDescriptor for the FillLayer. More...

class	FillLayer
	This layer represents a fill operation. More...

struct	FillQueueDescriptor

class	FirstInputTypedWorkload

struct	FLATBUFFERS_FINAL_CLASS

class	Float16Decoder

class	Float16Encoder

class	Float32Decoder

class	Float32Encoder

class	FloorLayer
	This layer represents a floor operation. More...

struct	FloorQueueDescriptor

struct	FullyConnectedDescriptor
	A FullyConnectedDescriptor for the FullyConnectedLayer. More...

class	FullyConnectedLayer
	This layer represents a fully connected operation. More...

struct	FullyConnectedQueueDescriptor

struct	FusedDescriptor
	A FusedDescriptor for the FusedLayer. More...

class	FusedLayer

struct	FusedQueueDescriptor

struct	GatherDescriptor
	A GatherDescriptor for the GatherLayer. More...

class	GatherLayer
	This layer represents a Gather operator. More...

class	GatherNdLayer
	This layer represents a GatherNd operator. More...

struct	GatherNdQueueDescriptor

struct	GatherQueueDescriptor

class	GpuFsaBackend

class	GpuFsaBackendContext

class	GpuFsaBackendDefaultAllocator
	Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...

class	GpuFsaBaseWorkload

class	GpuFsaConstantWorkload

class	GpuFsaContextControl

class	GpuFsaLayerSupport

class	GpuFsaMemoryManager

struct	GpuFsaPreCompiledBlob
	A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend. More...

class	GpuFsaPreCompiledWorkload

class	GpuFsaSubTensorHandle

class	GpuFsaTensorHandle

class	GpuFsaTensorHandleFactory

class	GpuFsaWorkloadFactory

class	Graph

class	GraphObservable

class	GraphValidationException

struct	GreaterQueueDescriptor

class	HtmlBold

class	HtmlFont

class	HtmlSection

class	HtmlSimpleTag

class	IAclTensorHandle

class	IBackend
	Each backend should implement an IBackend. More...

class	IBackendContext

class	IBackendInternal

class	IBackendModelContext

class	IClTensorHandle

class	ICLTensorProxy

class	IConnectableLayer
	Interface for a layer that is connectable to other layers via InputSlots and OutputSlots. More...

class	ICustomAllocator
	Custom Allocator interface. More...

class	IDeviceSpec
	Device specific knowledge to be passed to the optimizer. More...

class	IExecutionFrame
	ExecutionFrame interface to enqueue a workload computation. More...

class	IGpuAccTunedParameters
	Manages a set of GpuAcc parameters which have been tuned for maximum performance. More...

class	IGraphObservable

class	IInputSlot
	An input connection slot for a layer. More...

class	ILayerSupport

class	IMemoryManager

class	IMemoryOptimizerStrategy

struct	IMemoryOptimizerStrategyFactory

class	ImportMemGenericWorkload

class	INetwork
	Main network class which provides the interface for building up a neural network. More...

struct	INetworkProperties

class	InputLayer
	A layer user-provided data can be bound to (e.g. inputs, outputs). More...

class	InputSlot

struct	InstanceNormalizationDescriptor
	An InstanceNormalizationDescriptor for InstanceNormalizationLayer. More...

class	InstanceNormalizationLayer
	This layer represents an instance normalization operation. More...

struct	InstanceNormalizationQueueDescriptor

class	Instrument

class	Int32Decoder

class	Int32Encoder

class	Int32ToInt32tDecoder

class	Int32ToInt32tEncoder

class	Int64Decoder

class	Int64Encoder

class	InvalidArgumentException

class	IOptimizedNetwork

class	IOutputSlot
	An output connection slot for a layer. More...

class	IProfiler

class	IRuntime

struct	IsHalfType

struct	IsMemorySource

struct	IsMemorySource< MemorySource >

class	IStrategy

class	ISubgraphViewConverter

class	ITensorHandle

class	ITensorHandleFactory

class	IWorkload
	Workload interface to enqueue a layer computation. More...

class	IWorkloadFactory

struct	JsonChildObject

class	JsonPrinter

class	JsonUtils

struct	L2NormalizationDescriptor
	A L2NormalizationDescriptor for the L2NormalizationLayer. More...

class	L2NormalizationLayer
	This layer represents a L2 normalization operation. More...

struct	L2NormalizationQueueDescriptor

class	Layer

class	LayerSupportBase

class	LayerSupportHandle

struct	LayerTypeOfImpl

struct	LayerTypeOfImpl< LayerType::Activation >

struct	LayerTypeOfImpl< LayerType::Addition >

struct	LayerTypeOfImpl< LayerType::ArgMinMax >

struct	LayerTypeOfImpl< LayerType::BatchMatMul >

struct	LayerTypeOfImpl< LayerType::BatchNormalization >

struct	LayerTypeOfImpl< LayerType::BatchToSpaceNd >

struct	LayerTypeOfImpl< LayerType::BroadcastTo >

struct	LayerTypeOfImpl< LayerType::Cast >

struct	LayerTypeOfImpl< LayerType::ChannelShuffle >

struct	LayerTypeOfImpl< LayerType::Comparison >

struct	LayerTypeOfImpl< LayerType::Concat >

struct	LayerTypeOfImpl< LayerType::Constant >

struct	LayerTypeOfImpl< LayerType::ConvertFp16ToFp32 >

struct	LayerTypeOfImpl< LayerType::ConvertFp32ToFp16 >

struct	LayerTypeOfImpl< LayerType::Convolution2d >

struct	LayerTypeOfImpl< LayerType::Convolution3d >

struct	LayerTypeOfImpl< LayerType::Debug >

struct	LayerTypeOfImpl< LayerType::DepthToSpace >

struct	LayerTypeOfImpl< LayerType::DepthwiseConvolution2d >

struct	LayerTypeOfImpl< LayerType::Dequantize >

struct	LayerTypeOfImpl< LayerType::DetectionPostProcess >

struct	LayerTypeOfImpl< LayerType::Division >

struct	LayerTypeOfImpl< LayerType::ElementwiseBinary >

struct	LayerTypeOfImpl< LayerType::ElementwiseUnary >

struct	LayerTypeOfImpl< LayerType::FakeQuantization >

struct	LayerTypeOfImpl< LayerType::Fill >

struct	LayerTypeOfImpl< LayerType::Floor >

struct	LayerTypeOfImpl< LayerType::FullyConnected >

struct	LayerTypeOfImpl< LayerType::Fused >

struct	LayerTypeOfImpl< LayerType::Gather >

struct	LayerTypeOfImpl< LayerType::GatherNd >

struct	LayerTypeOfImpl< LayerType::Input >

struct	LayerTypeOfImpl< LayerType::InstanceNormalization >

struct	LayerTypeOfImpl< LayerType::L2Normalization >

struct	LayerTypeOfImpl< LayerType::LogicalBinary >

struct	LayerTypeOfImpl< LayerType::LogSoftmax >

struct	LayerTypeOfImpl< LayerType::Lstm >

struct	LayerTypeOfImpl< LayerType::Map >

struct	LayerTypeOfImpl< LayerType::Maximum >

struct	LayerTypeOfImpl< LayerType::Mean >

struct	LayerTypeOfImpl< LayerType::MemCopy >

struct	LayerTypeOfImpl< LayerType::MemImport >

struct	LayerTypeOfImpl< LayerType::Merge >

struct	LayerTypeOfImpl< LayerType::Minimum >

struct	LayerTypeOfImpl< LayerType::Multiplication >

struct	LayerTypeOfImpl< LayerType::Normalization >

struct	LayerTypeOfImpl< LayerType::Output >

struct	LayerTypeOfImpl< LayerType::Pad >

struct	LayerTypeOfImpl< LayerType::Permute >

struct	LayerTypeOfImpl< LayerType::Pooling2d >

struct	LayerTypeOfImpl< LayerType::Pooling3d >

struct	LayerTypeOfImpl< LayerType::PreCompiled >

struct	LayerTypeOfImpl< LayerType::Prelu >

struct	LayerTypeOfImpl< LayerType::QLstm >

struct	LayerTypeOfImpl< LayerType::Quantize >

struct	LayerTypeOfImpl< LayerType::QuantizedLstm >

struct	LayerTypeOfImpl< LayerType::Rank >

struct	LayerTypeOfImpl< LayerType::Reduce >

struct	LayerTypeOfImpl< LayerType::Reshape >

struct	LayerTypeOfImpl< LayerType::Resize >

struct	LayerTypeOfImpl< LayerType::ReverseV2 >

struct	LayerTypeOfImpl< LayerType::Shape >

struct	LayerTypeOfImpl< LayerType::Slice >

struct	LayerTypeOfImpl< LayerType::Softmax >

struct	LayerTypeOfImpl< LayerType::SpaceToBatchNd >

struct	LayerTypeOfImpl< LayerType::SpaceToDepth >

struct	LayerTypeOfImpl< LayerType::Splitter >

struct	LayerTypeOfImpl< LayerType::Stack >

struct	LayerTypeOfImpl< LayerType::StandIn >

struct	LayerTypeOfImpl< LayerType::StridedSlice >

struct	LayerTypeOfImpl< LayerType::Subtraction >

struct	LayerTypeOfImpl< LayerType::Switch >

struct	LayerTypeOfImpl< LayerType::Tile >

struct	LayerTypeOfImpl< LayerType::Transpose >

struct	LayerTypeOfImpl< LayerType::TransposeConvolution2d >

struct	LayerTypeOfImpl< LayerType::UnidirectionalSequenceLstm >

struct	LayerTypeOfImpl< LayerType::Unmap >

class	LayerValidationException

class	LayerWithParameters

class	LoadedNetwork

struct	log

struct	LogicalBinaryDescriptor
	A LogicalBinaryDescriptor for the LogicalBinaryLayer. More...

struct	LogicalBinaryFunction

class	LogicalBinaryLayer
	This layer represents a Logical Binary operation. More...

struct	LogicalBinaryQueueDescriptor

struct	LogicalUnaryFunction

class	LogSink

class	LogSoftmaxLayer
	This layer represents a log softmax operation. More...

struct	LogSoftmaxQueueDescriptor

struct	LstmBasicParameters

struct	LstmDescriptor
	An LstmDescriptor for the LstmLayer. More...

struct	LstmInputParams

struct	LstmInputParamsInfo

class	LstmLayer
	This layer represents a LSTM operation. More...

struct	LstmOptCifgParameters

struct	LstmOptLayerNormParameters

struct	LstmOptPeepholeParameters

struct	LstmOptProjectionParameters

struct	LstmQueueDescriptor

class	ManagedConstTensorHandle

class	MapLayer
	This layer represents a memory copy operation. More...

struct	MapQueueDescriptor

class	MapWorkload

struct	maximum

class	MaximumLayer
	This layer represents a maximum operation. More...

struct	MaximumQueueDescriptor

struct	MeanDescriptor
	A MeanDescriptor for the MeanLayer. More...

class	MeanLayer
	This layer represents a mean operation. More...

struct	MeanQueueDescriptor

struct	Measurement

struct	MemBin

struct	MemBlock

class	MemCopyLayer
	This layer represents a memory copy operation. More...

struct	MemCopyQueueDescriptor

class	MemImportLayer
	This layer represents a memory import operation. More...

struct	MemImportQueueDescriptor

class	MemoryExportException

class	MemoryImportException

struct	MemoryInfo

class	MemoryManager

struct	MemoryRequirements

class	MemoryValidationException

struct	MemSyncQueueDescriptor

class	MergeLayer
	This layer dequantizes the input tensor. More...

struct	MergeQueueDescriptor

struct	minimum

class	MinimumLayer
	This layer represents a minimum operation. More...

struct	MinimumQueueDescriptor

class	MockTensorHandleFactory

class	MultiplicationLayer
	This layer represents a multiplication operation. More...

struct	MultiplicationQueueDescriptor

class	MultiTypedWorkload

class	NeonAbsWorkload

class	NeonActivationWorkload

class	NeonAdditionWorkload

class	NeonArgMinMaxWorkload

class	NeonBackend

class	NeonBackendModelContext
	The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions. More...

class	NeonBaseWorkload

class	NeonBatchMatMulWorkload

class	NeonBatchNormalizationWorkload

class	NeonBatchToSpaceNdWorkload

class	NeonCastWorkload

class	NeonChannelShuffleWorkload

class	NeonComparisonWorkload

class	NeonConcatWorkload

class	NeonConstantWorkload

class	NeonConvertFp16ToFp32Workload

class	NeonConvertFp32ToFp16Workload

class	NeonConvolution2dWorkload

class	NeonConvolution3dWorkload

class	NeonDepthToSpaceWorkload

class	NeonDepthwiseConvolutionWorkload

class	NeonDequantizeWorkload

class	NeonDetectionPostProcessWorkload

class	NeonDivisionWorkload

class	NeonElementwiseBinaryWorkload

class	NeonExpWorkload

class	NeonFillWorkload

class	NeonFloorFloatWorkload

class	NeonFullyConnectedWorkload

class	NeonFusedWorkload

class	NeonGatherNdWorkload

class	NeonGatherWorkload

class	NeonInstanceNormalizationWorkload

class	NeonInterceptorScheduler

class	NeonL2NormalizationFloatWorkload

class	NeonLayerSupport

class	NeonLogicalAndWorkload

class	NeonLogicalNotWorkload

class	NeonLogicalOrWorkload

class	NeonLogSoftmaxWorkload

class	NeonLogWorkload

class	NeonLstmFloatWorkload

class	NeonMaximumWorkload

class	NeonMeanWorkload

class	NeonMemoryManager

class	NeonMinimumWorkload

class	NeonMultiplicationWorkload

class	NeonNegWorkload

class	NeonNormalizationFloatWorkload

class	NeonPadWorkload

class	NeonPermuteWorkload

class	NeonPooling2dWorkload

class	NeonPooling3dWorkload

class	NeonPreluWorkload

class	NeonQLstmWorkload

class	NeonQuantizedLstmWorkload

class	NeonQuantizeWorkload

struct	NeonRankWorkload

class	NeonReduceWorkload

class	NeonReshapeWorkload

class	NeonResizeWorkload

class	NeonReverseV2Workload

class	NeonRsqrtWorkload

class	NeonSinWorkload

class	NeonSliceWorkload

class	NeonSoftmaxWorkload

class	NeonSpaceToBatchNdWorkload

class	NeonSpaceToDepthWorkload

class	NeonSplitterWorkload

class	NeonSqrtWorkload

class	NeonStackWorkload

class	NeonStridedSliceWorkload

class	NeonSubTensorHandle

class	NeonSubtractionWorkload

class	NeonTensorDecorator
	NeonTensorDecorator wraps an existing Neon tensor allowing us to override the TensorInfo for it. More...

class	NeonTensorHandle

class	NeonTensorHandleDecorator

class	NeonTensorHandleFactory

class	NeonTileWorkload

class	NeonTimer

class	NeonTransposeConvolution2dWorkload

class	NeonTransposeWorkload

class	NeonUnidirectionalSequenceLstmFloatWorkload

class	NeonUnidirectionalSequenceLstmWorkload

class	NeonWorkloadFactory

class	NetworkImpl
	Private implementation of INetwork. More...

class	NodeContent

struct	NormalizationDescriptor
	A NormalizationDescriptor for the NormalizationLayer. More...

class	NormalizationLayer
	This layer represents a normalization operation. More...

struct	NormalizationQueueDescriptor

struct	NoThrowStrategy

struct	NullDescriptor
	Null Descriptor used as a return value from the IConnectableLayer GetParameters method by layers which do not have a descriptor. More...

class	NullPointerException

class	NullWorkload

class	OpenClTimer
	OpenClTimer instrument that times all OpenCl kernels executed between calls to Start() and Stop(). More...

class	Optimization

struct	OptimizationResult

class	OptimizationViews

class	OptimizedNetworkImpl

class	OptimizeForConnection

class	OptimizeForConnectionImpl
	Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. More...

class	OptimizeForExclusiveConnection

class	OptimizeForExclusiveConnectionImpl
	Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. More...

class	OptimizeForType

class	OptimizeForTypeImpl
	Wrapper Optimization base class that calls Wrapped::Run() for every layer of type BaseType. More...

class	OptimizeForTypeImpl< Layer, Wrapped >
	Specialization that calls Wrapped::Run() for any layer type. More...

class	Optimizer

struct	OptimizerOptions

class	OptimizerOptionsOpaque

struct	OptimizerOptionsOpaqueImpl

class	Optional

class	OptionalBase
	OptionalBase is the common functionality between reference and non-reference optional types. More...

class	OptionalReferenceSwitch
	The default implementation is the non-reference case. More...

class	OptionalReferenceSwitch< true, T >
	This is the special case for reference types. More...

struct	OriginsDescriptor
	An OriginsDescriptor for the ConcatLayer. More...

class	OutputHandler

class	OutputLayer
	A layer user-provided data can be bound to (e.g. inputs, outputs). More...

class	OutputSlot

struct	PadDescriptor
	A PadDescriptor for the PadLayer. More...

class	PadLayer
	This layer represents a pad operation. More...

struct	PadQueueDescriptor

class	ParseException

class	PassthroughTensorHandle

class	PerAxisIterator
	PerAxisIterator for per-axis quantization. More...

class	PermutationVector

struct	PermuteDescriptor
	A PermuteDescriptor for the PermuteLayer. More...

class	PermuteLayer
	This layer represents a permutation operation. More...

struct	PermuteQueueDescriptor

class	PolymorphicDowncastException

struct	Pooling2dDescriptor
	A Pooling2dDescriptor for the Pooling2dLayer. More...

class	Pooling2dLayer
	This layer represents a pooling 2d operation. More...

struct	Pooling2dQueueDescriptor

struct	Pooling3dDescriptor
	A Pooling3dDescriptor for the Pooling3dLayer. More...

class	Pooling3dLayer
	This layer represents a pooling 3d operation. More...

struct	Pooling3dQueueDescriptor

struct	power

struct	PreCompiledDescriptor
	A PreCompiledDescriptor for the PreCompiledLayer. More...

class	PreCompiledLayer

struct	PreCompiledQueueDescriptor

class	PreluLayer

struct	PreluQueueDescriptor

class	ProfilerImpl

class	ProfilerManager

class	ProfilingDetails
	ProfilingDetails class records any details associated with the operator and passes on for outputting to the user. More...

struct	ProgramBuilder

class	QASymm8Decoder

class	QASymm8Encoder

class	QASymmS8Decoder

class	QASymmS8Encoder

struct	QLstmBasicParameters

struct	QLstmDescriptor
	A QLstmDescriptor for the QLstmLayer. More...

class	QLstmLayer
	This layer represents a QLstm operation. More...

struct	QLstmOptCifgParameters

struct	QLstmOptLayerNormParameters

struct	QLstmOptPeepholeParameters

struct	QLstmOptProjectionParameters

struct	QLstmQueueDescriptor

class	QSymm16Decoder

class	QSymm16Encoder

class	QSymm16PerAxisEncoder

class	QSymm8PerAxisDecoder

class	QSymm8PerAxisEncoder

class	QSymmS8Decoder

class	QSymmS8Encoder

struct	QuantizationParametersAreEqual

struct	QuantizedLstmInputParams

struct	QuantizedLstmInputParamsInfo

class	QuantizedLstmLayer
	This layer represents a QuantizedLstm operation. More...

struct	QuantizedLstmParameters

struct	QuantizedLstmQueueDescriptor

struct	QuantizedMultiplierSmallerThanOne
	Performs multiplication of an integer with a multiplier which is less than one, using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. More...

class	QuantizeLayer

struct	QuantizeQueueDescriptor

struct	QueueDescriptor

struct	QueueDescriptorWithParameters

class	RangeTracker

class	RankLayer

struct	RankQueueDescriptor

struct	ReduceDescriptor
	A ReduceDescriptor for the REDUCE operators. More...

class	ReduceLayer
	This layer represents a reduction operation. More...

struct	ReduceQueueDescriptor

class	RefActivationWorkload

class	RefArgMinMaxWorkload

class	RefBackend

class	RefBaseWorkload

class	RefBatchMatMulWorkload

class	RefBatchNormalizationWorkload

class	RefBatchToSpaceNdWorkload

class	RefBroadcastToWorkload

class	RefCastWorkload

class	RefChannelShuffleWorkload

class	RefComparisonWorkload

class	RefConcatWorkload

class	RefConstantWorkload

class	RefConvertFp16ToFp32Workload

class	RefConvertFp32ToFp16Workload

class	RefConvolution2dWorkload

class	RefConvolution3dWorkload

class	RefDebugWorkload

class	RefDepthToSpaceWorkload

class	RefDepthwiseConvolution2dWorkload

class	RefDequantizeWorkload

class	RefDetectionPostProcessWorkload

class	RefElementwiseBinaryWorkload

class	RefElementwiseUnaryWorkload

class	RefElementwiseWorkload

class	RefFakeQuantizationFloat32Workload

class	RefFillWorkload

class	RefFloorWorkload

class	RefFullyConnectedWorkload

class	RefGatherNdWorkload

class	RefGatherWorkload

class	RefInstanceNormalizationWorkload

class	RefL2NormalizationWorkload

class	RefLayerSupport

class	RefLogicalBinaryWorkload

class	RefLogicalUnaryWorkload

class	RefLogSoftmaxWorkload

class	RefLstmWorkload

class	RefMeanWorkload

class	RefMemoryManager

class	RefNormalizationWorkload

class	RefPadWorkload

class	RefPermuteWorkload

class	RefPooling2dWorkload

class	RefPooling3dWorkload

class	RefPreluWorkload

class	RefQLstmWorkload

class	RefQuantizeWorkload

struct	RefRankWorkload

class	RefReduceWorkload

class	RefReshapeWorkload

class	RefResizeWorkload

class	RefReverseV2Workload

struct	RefShapeWorkload

class	RefSliceWorkload

class	RefSoftmaxWorkload

class	RefSpaceToBatchNdWorkload

class	RefSpaceToDepthWorkload

class	RefSplitterWorkload

class	RefStackWorkload

class	RefStridedSliceWorkload

class	RefTensorHandle

class	RefTensorHandleDecorator

class	RefTensorHandleFactory

class	RefTileWorkload

class	RefTransposeConvolution2dWorkload

class	RefTransposeWorkload

class	RefUnidirectionalSequenceLstmWorkload

class	RefWorkloadFactory

struct	ReshapeDescriptor
	A ReshapeDescriptor for the ReshapeLayer. More...

class	ReshapeLayer
	This layer represents a reshape operation. More...

struct	ReshapeQueueDescriptor

struct	ResizeDescriptor
	A ResizeDescriptor for the ResizeLayer. More...

class	ResizeLayer
	This layer represents a resize operation. More...

struct	ResizeQueueDescriptor

struct	ResolveTypeImpl

struct	ResolveTypeImpl< DataType::BFloat16 >

struct	ResolveTypeImpl< DataType::Boolean >

struct	ResolveTypeImpl< DataType::Float16 >

struct	ResolveTypeImpl< DataType::Float32 >

struct	ResolveTypeImpl< DataType::QAsymmS8 >

struct	ResolveTypeImpl< DataType::QAsymmU8 >

struct	ResolveTypeImpl< DataType::QSymmS16 >

struct	ResolveTypeImpl< DataType::QSymmS8 >

struct	ResolveTypeImpl< DataType::Signed32 >

struct	ResolveTypeImpl< DataType::Signed64 >

class	ReverseV2Layer
	This layer represents a ReverseV2 operation. More...

struct	ReverseV2QueueDescriptor

struct	rsqrt

class	RsqrtLayer

struct	RsqrtQueueDescriptor

struct	Rule

class	RuntimeException

struct	RuntimeImpl

class	ScaledInt32Decoder

class	ScaledInt32PerAxisDecoder

class	ScopedProfilingEvent

struct	ScopedRecord

class	ScopedTensorHandle

class	ShapeLayer

struct	ShapeQueueDescriptor

struct	ShapesAreBroadcastCompatible

struct	ShapesAreSameRank

struct	ShapesAreSameTotalSize

class	SimpleLogger

struct	sin

class	SingleAxisPriorityList
	SingleAxisPriorityList sorts the MemBlocks according to some priority, then trys to place them into as few bins as possible. More...

struct	SliceDescriptor
	A SliceDescriptor for the SliceLayer. More...

class	SliceLayer

struct	SliceQueueDescriptor

struct	SoftmaxDescriptor
	A SoftmaxDescriptor for the SoftmaxLayer. More...

class	SoftmaxLayer
	This layer represents a softmax operation. More...

struct	SoftmaxQueueDescriptor

struct	SpaceToBatchNdDescriptor
	A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer. More...

class	SpaceToBatchNdLayer
	This layer represents a SpaceToBatchNd operation. More...

struct	SpaceToBatchNdQueueDescriptor

struct	SpaceToDepthDescriptor
	A SpaceToDepthDescriptor for the SpaceToDepthLayer. More...

class	SpaceToDepthLayer
	This layer represents a SpaceToDepth operation. More...

struct	SpaceToDepthQueueDescriptor

class	SplitterLayer
	This layer represents a split operation. More...

struct	SplitterQueueDescriptor

struct	sqrt

struct	squaredDifference

struct	StackDescriptor
	A StackDescriptor for the StackLayer. More...

class	StackLayer
	This layer represents a stack operation. More...

struct	StackQueueDescriptor

class	StandardOutputSink

struct	StandInDescriptor
	A StandInDescriptor for the StandIn layer. More...

class	StandInLayer
	This layer represents an unknown operation in the input graph. More...

class	StrategyBase
	Strategy base class with empty implementations. More...

struct	StrategyFactory

class	StrategyValidator

struct	StridedSliceDescriptor
	A StridedSliceDescriptor for the StridedSliceLayer. More...

class	StridedSliceLayer
	This layer represents a strided slice operation. More...

struct	StridedSliceQueueDescriptor

struct	StringifyLayerParameters
	StringifyLayerParameters allows serializing layer parameters to string. More...

struct	StringifyLayerParameters< ActivationDescriptor >

struct	StringifyLayerParameters< BatchMatMulDescriptor >

struct	StringifyLayerParameters< BatchNormalizationDescriptor >

struct	StringifyLayerParameters< BatchToSpaceNdDescriptor >

struct	StringifyLayerParameters< ChannelShuffleDescriptor >

struct	StringifyLayerParameters< ComparisonDescriptor >

struct	StringifyLayerParameters< Convolution2dDescriptor >

struct	StringifyLayerParameters< Convolution3dDescriptor >

struct	StringifyLayerParameters< DepthwiseConvolution2dDescriptor >

struct	StringifyLayerParameters< DetectionPostProcessDescriptor >

struct	StringifyLayerParameters< ElementwiseBinaryDescriptor >

struct	StringifyLayerParameters< ElementwiseUnaryDescriptor >

struct	StringifyLayerParameters< FakeQuantizationDescriptor >

struct	StringifyLayerParameters< FullyConnectedDescriptor >

struct	StringifyLayerParameters< FusedDescriptor >

struct	StringifyLayerParameters< GatherDescriptor >

struct	StringifyLayerParameters< L2NormalizationDescriptor >

struct	StringifyLayerParameters< LstmDescriptor >

struct	StringifyLayerParameters< MeanDescriptor >

struct	StringifyLayerParameters< NormalizationDescriptor >

struct	StringifyLayerParameters< OriginsDescriptor >

struct	StringifyLayerParameters< PadDescriptor >

struct	StringifyLayerParameters< PermuteDescriptor >

struct	StringifyLayerParameters< Pooling2dDescriptor >

struct	StringifyLayerParameters< Pooling3dDescriptor >

struct	StringifyLayerParameters< PreCompiledDescriptor >

struct	StringifyLayerParameters< ReduceDescriptor >

struct	StringifyLayerParameters< ReshapeDescriptor >

struct	StringifyLayerParameters< ResizeDescriptor >

struct	StringifyLayerParameters< SoftmaxDescriptor >

struct	StringifyLayerParameters< SpaceToBatchNdDescriptor >

struct	StringifyLayerParameters< SpaceToDepthDescriptor >

struct	StringifyLayerParameters< StackDescriptor >

struct	StringifyLayerParameters< StridedSliceDescriptor >

struct	StringifyLayerParameters< TileDescriptor >

struct	StringifyLayerParameters< TransposeConvolution2dDescriptor >

struct	StringifyLayerParameters< TransposeDescriptor >

struct	StringifyLayerParameters< ViewsDescriptor >

struct	StringMapping
	StringMapping is helper class to be able to use strings as template parameters, so this allows simplifying code which only differs in a string, such as a debug string literal. More...

class	SubgraphView
	The SubgraphView class represents a subgraph of a Graph. More...

class	SubgraphViewSelector
	Algorithm that splits a Graph into Subgraphs based on a filtering of layers (e.g. More...

class	SubtractionLayer
	This layer represents a subtraction operation. More...

struct	SubtractionQueueDescriptor

class	SwitchLayer
	This layer calculates both true and false outputs for input. More...

struct	SwitchQueueDescriptor

class	SyncMemGenericWorkload

class	Tensor
	A tensor defined by a TensorInfo (shape and data type) and a mutable backing store. More...

class	TensorBufferArrayView

class	TensorHandle

class	TensorHandleFactoryRegistry

class	TensorInfo

struct	TensorMemory

struct	TensorNumDimensionsAreCorrect

struct	TensorNumDimensionsAreGreaterOrEqualTo

class	TensorShape

struct	ThrowingStrategy

struct	TileDescriptor

class	TileLayer

struct	TileQueueDescriptor

class	TimeoutException

class	TosaRefBackend

class	TosaRefBaseWorkload

class	TosaRefLayerSupport

class	TosaRefMemoryManager

class	TosaRefPreCompiledWorkload

class	TosaRefTensorHandle

class	TosaRefTensorHandleFactory

class	TosaRefWorkloadFactory

class	TransformIterator

struct	TransposeConvolution2dDescriptor
	A TransposeConvolution2dDescriptor for the TransposeConvolution2dLayer. More...

class	TransposeConvolution2dLayer
	This layer represents a 2D transpose convolution operation. More...

struct	TransposeConvolution2dQueueDescriptor

struct	TransposeDescriptor
	A TransposeDescriptor for the TransposeLayer. More...

class	TransposeLayer
	This layer represents a transpose operation. More...

struct	TransposeQueueDescriptor

struct	TypeAnyOf

class	TypedIterator

class	TypedWorkload

struct	TypeIs

struct	TypeNotPerAxisQuantized

struct	TypesAreEqual

class	UnidirectionalSequenceLstmLayer
	This layer represents a LSTM operation. More...

struct	UnidirectionalSequenceLstmQueueDescriptor

class	UnimplementedException

class	UnmapLayer
	This layer represents a memory copy operation. More...

struct	UnmapQueueDescriptor

class	UnmapWorkload

struct	ViewsDescriptor
	A ViewsDescriptor for the SplitterLayer. More...

class	WallClockTimer

class	WorkloadDataCollector

class	WorkloadFactoryBase

struct	WorkloadInfo
	Contains information about TensorInfos of a layer. More...

Typedefs
using	BackendIdVector = std::vector< BackendId >

using	BackendIdSet = std::unordered_set< BackendId >

using	NetworkOptions = std::vector< BackendOptions >

using	ModelOptions = std::vector< BackendOptions >

using	BackendCapabilities = BackendOptions

using	IBackendInternalUniquePtr = std::unique_ptr< IBackendInternal >

using	MemoryOptimizerStrategiesMapRef = std::unordered_map< BackendId, std::shared_ptr< IMemoryOptimizerStrategy > >

using	DynamicBackendPtr = std::unique_ptr< DynamicBackend >

using	IBackendContextUniquePtr = std::unique_ptr< IBackendContext >

using	ILayerSupportSharedPtr = std::shared_ptr< ILayerSupport >

using	IMemoryManagerUniquePtr = std::unique_ptr< IMemoryManager >

template<typename QueueDescriptor >
using	FloatWorkload = TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 >

template<typename QueueDescriptor >
using	Float32Workload = TypedWorkload< QueueDescriptor, armnn::DataType::Float32 >

template<typename QueueDescriptor >
using	Uint8Workload = TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 >

template<typename QueueDescriptor >
using	Int32Workload = TypedWorkload< QueueDescriptor, armnn::DataType::Signed32 >

template<typename QueueDescriptor >
using	BooleanWorkload = TypedWorkload< QueueDescriptor, armnn::DataType::Boolean >

template<typename QueueDescriptor >
using	BaseFloat32ComparisonWorkload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean >

template<typename QueueDescriptor >
using	BaseUint8ComparisonWorkload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Boolean >

template<typename QueueDescriptor >
using	BFloat16ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::BFloat16, armnn::DataType::Float32 >

template<typename QueueDescriptor >
using	Float32ToBFloat16Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::BFloat16 >

template<typename QueueDescriptor >
using	Float16ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 >

template<typename QueueDescriptor >
using	Float32ToFloat16Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Float16 >

template<typename QueueDescriptor >
using	Uint8ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Float32 >

using	InputQueueDescriptor = MemCopyQueueDescriptor

using	OutputQueueDescriptor = MemCopyQueueDescriptor

using	MergerQueueDescriptor = ConcatQueueDescriptor

using	LogSoftmaxDescriptor = SoftmaxDescriptor
	A LogSoftmaxDescriptor for the LogSoftmaxLayer. More...

using	DepthToSpaceDescriptor = SpaceToDepthDescriptor
	A DepthToSpaceDescriptor for the DepthToSpaceLayer. More...

using	UnidirectionalSequenceLstmDescriptor = LstmDescriptor

using	ConcatDescriptor = OriginsDescriptor

using	MergerDescriptor = OriginsDescriptor
	MergerDescriptor is deprecated, use ConcatDescriptor instead. More...

using	SplitterDescriptor = ViewsDescriptor

using	INetworkPtr = std::unique_ptr< INetwork, void()(INetwork network)>

using	IOptimizedNetworkPtr = std::unique_ptr< IOptimizedNetwork, void()(IOptimizedNetwork network)>

using	CompiledBlobDeleter = std::function< void(const void *)>

using	CompiledBlobPtr = std::unique_ptr< void, CompiledBlobDeleter >

using	NetworkId = int

using	IRuntimePtr = std::unique_ptr< IRuntime, void()(IRuntime runtime)>

using	IGpuAccTunedParametersPtr = std::shared_ptr< IGpuAccTunedParameters >
	The following API is replaced by the backend options API. More...

using	MemorySourceFlags = unsigned int

using	BindingPointInfo = std::pair< armnn::LayerBindingId, armnn::TensorInfo >

using	InputTensors = std::vector< std::pair< LayerBindingId, class ConstTensor > >

using	OutputTensors = std::vector< std::pair< LayerBindingId, class Tensor > >

using	IBackendSharedPtr = std::shared_ptr< IBackend >

using	IBackendUniquePtr = std::unique_ptr< IBackend, void()(IBackend backend)>

using	LayerBindingId = int
	Type of identifiers for bindable layers (inputs, outputs). More...

using	ImportedInputId = unsigned int

using	ImportedOutputId = unsigned int

using	DebugCallbackFunction = std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)>
	Define the type of callback for the Debug layer to call. More...

using	HighResolutionClock = std::chrono::high_resolution_clock::time_point
	Define a timer and associated inference ID for recording execution times. More...

using	InferenceTimingPair = std::pair< HighResolutionClock, HighResolutionClock >

using	TensorInfos = std::vector< TensorInfo >

using	WorkloadQueue = std::vector< std::unique_ptr< IWorkload > >

using	Coordinates = std::array< unsigned int, MaxNumOfTensorDimensions >

using	Dimensions = std::array< unsigned int, MaxNumOfTensorDimensions >

using	LayerPriority = unsigned int

using	AdditionalInfoObjectPtr = std::shared_ptr< void >

using	PreCompiledObjectDeleter = std::function< void(const void *)>

using	PreCompiledObjectPtr = std::unique_ptr< void, PreCompiledObjectDeleter >

template<LayerType Type>
using	LayerTypeOf = typename LayerTypeOfImpl< Type >::Type

using	NetworkImplPtr = std::unique_ptr< NetworkImpl, void()(NetworkImpl network)>

using	BackendsMap = std::map< BackendId, std::unique_ptr< class IBackendInternal > >

template<DataType DT>
using	ResolveType = typename ResolveTypeImpl< DT >::Type

using	LoadedNetworks = std::unordered_map< NetworkId, std::unique_ptr< LoadedNetwork > >

using	IReportStructure = arm::pipe::IReportStructure

using	IInitialiseProfilingService = arm::pipe::IInitialiseProfilingService

using	ParameterStringifyFunction = std::function< void(const std::string &name, const std::string &value)>

using	FactoryId = ITensorHandleFactory::FactoryId

using	Half = half_float::half

using	CopyAndImportFactoryPairs = std::map< ITensorHandleFactory::FactoryId, ITensorHandleFactory::FactoryId >

using	ACLMemManagerOnDemand = std::shared_ptr< arm_compute::MemoryManagerOnDemand >

using	RefDebugBFloat16Workload = RefDebugWorkload< DataType::BFloat16 >

using	RefDebugFloat16Workload = RefDebugWorkload< DataType::Float16 >

using	RefDebugFloat32Workload = RefDebugWorkload< DataType::Float32 >

using	RefDebugQAsymmU8Workload = RefDebugWorkload< DataType::QAsymmU8 >

using	RefDebugQAsymmS8Workload = RefDebugWorkload< DataType::QAsymmS8 >

using	RefDebugQSymmS16Workload = RefDebugWorkload< DataType::QSymmS16 >

using	RefDebugQSymmS8Workload = RefDebugWorkload< DataType::QSymmS8 >

using	RefDebugSigned32Workload = RefDebugWorkload< DataType::Signed32 >

using	RefDebugSigned64Workload = RefDebugWorkload< DataType::Signed64 >

template<typename DataType = float>
using	RefAdditionWorkload = RefElementwiseWorkload< std::plus< DataType >, AdditionQueueDescriptor, StringMapping::RefAdditionWorkload_Execute >

template<typename DataType = float>
using	RefSubtractionWorkload = RefElementwiseWorkload< std::minus< DataType >, SubtractionQueueDescriptor, StringMapping::RefSubtractionWorkload_Execute >

template<typename DataType = float>
using	RefMultiplicationWorkload = RefElementwiseWorkload< std::multiplies< DataType >, MultiplicationQueueDescriptor, StringMapping::RefMultiplicationWorkload_Execute >

template<typename DataType = float>
using	RefDivisionWorkload = RefElementwiseWorkload< std::divides< DataType >, DivisionQueueDescriptor, StringMapping::RefDivisionWorkload_Execute >

template<typename DataType = float>
using	RefMaximumWorkload = RefElementwiseWorkload< armnn::maximum< DataType >, MaximumQueueDescriptor, StringMapping::RefMaximumWorkload_Execute >

template<typename DataType = float>
using	RefMinimumWorkload = RefElementwiseWorkload< armnn::minimum< DataType >, MinimumQueueDescriptor, StringMapping::RefMinimumWorkload_Execute >

using	RefPermuteBFloat16Workload = RefPermuteWorkload< DataType::BFloat16 >

using	RefPermuteFloat16Workload = RefPermuteWorkload< DataType::Float16 >

using	RefPermuteFloat32Workload = RefPermuteWorkload< DataType::Float32 >

using	RefPermuteQAsymmS8Workload = RefPermuteWorkload< DataType::QAsymmS8 >

using	RefPermuteQAsymm8Workload = RefPermuteWorkload< DataType::QAsymmU8 >

using	RefPermuteQSymm16Workload = RefPermuteWorkload< DataType::QSymmS16 >

using	RefTransposeBFloat16Workload = RefTransposeWorkload< DataType::BFloat16 >

using	RefTransposeFloat16Workload = RefTransposeWorkload< DataType::Float16 >

using	RefTransposeFloat32Workload = RefTransposeWorkload< DataType::Float32 >

using	RefTransposeQAsymmS8Workload = RefTransposeWorkload< DataType::QAsymmS8 >

using	RefTransposeQAsymm8Workload = RefTransposeWorkload< DataType::QAsymmU8 >

using	RefTransposeQSymm16Workload = RefTransposeWorkload< DataType::QSymmS16 >

Enumerations
enum	Compute { Undefined = 0, CpuRef = 1, CpuAcc = 2, GpuAcc = 3 }
	The Compute enum is now deprecated and it is now being replaced by BackendId. More...

enum	CapabilityClass { PaddingRequired = 1, FallbackImportDisabled = 2, CapabilityClassMax = 254 }
	Capability class to calculate in the GetCapabilities function so that only the capability in the scope can be choose to calculate. More...

enum	EdgeStrategy { Undefined, DirectCompatibility, ExportToTarget, CopyToTarget }

enum	BoostLogSeverityMapping { trace, debug, info, warning, error, fatal }

enum	Status { Success = 0, Failure = 1 }

enum	DataType { Float16 = 0, Float32 = 1, QAsymmU8 = 2, Signed32 = 3, Boolean = 4, QSymmS16 = 5, QSymmS8 = 6, QAsymmS8 = 7, BFloat16 = 8, Signed64 = 9 }

enum	DataLayout { NCHW = 1, NHWC = 2, NDHWC = 3, NCDHW = 4 }

enum	ProfilingDetailsMethod { Undefined = 0, DetailsWithEvents = 1, DetailsOnly = 2 }
	Define the behaviour of the internal profiler when outputting network details. More...

enum	QosExecPriority { Low = 0, Medium = 1, High = 2 }

enum	ActivationFunction { Sigmoid = 0, TanH = 1, Linear = 2, ReLu = 3, BoundedReLu = 4, SoftReLu = 5, LeakyReLu = 6, Abs = 7, Sqrt = 8, Square = 9, Elu = 10, HardSwish = 11, Gelu = 12 }

enum	ArgMinMaxFunction { Min = 0, Max = 1 }

enum	ComparisonOperation { Equal = 0, Greater = 1, GreaterOrEqual = 2, Less = 3, LessOrEqual = 4, NotEqual = 5 }

enum	LogicalBinaryOperation { LogicalAnd = 0, LogicalOr = 1 }

enum	UnaryOperation { Abs = 0, Exp = 1, Sqrt = 2, Rsqrt = 3, Neg = 4, LogicalNot = 5, Log = 6, Sin = 7, Ceil = 8 }

enum	BinaryOperation { Add = 0, Div = 1, Maximum = 2, Minimum = 3, Mul = 4, Sub = 5, SqDiff = 6, Power = 7 }

enum	PoolingAlgorithm { Max = 0, Average = 1, L2 = 2 }

enum	ReduceOperation { Sum = 0, Max = 1, Mean = 2, Min = 3, Prod = 4 }

enum	ResizeMethod { Bilinear = 0, NearestNeighbor = 1 }

enum	Dimensionality { NotSpecified = 0, Specified = 1, Scalar = 2 }

enum	PaddingMethod { IgnoreValue = 0, Exclude = 1 }
	The padding method modifies the output of pooling layers. More...

enum	PaddingMode { Constant = 0, Reflect = 1, Symmetric = 2 }
	The padding mode controls whether the padding should be filled with constant values (Constant), or reflect the input, either including the border values (Symmetric) or not (Reflect). More...

enum	NormalizationAlgorithmChannel { Across = 0, Within = 1 }

enum	NormalizationAlgorithmMethod { LocalBrightness = 0, LocalContrast = 1 }

enum	OutputShapeRounding { Floor = 0, Ceiling = 1 }

enum	ShapeInferenceMethod { ValidateOnly = 0, InferAndValidate = 1 }
	The ShapeInferenceMethod modify how the output shapes are treated. More...

enum	MemorySource : uint32_t { Undefined = 0, Malloc = 1, DmaBuf = 2, DmaBufProtected = 4, Gralloc = 8 }
	Define the Memory Source to reduce copies. More...

enum	MemBlockStrategyType { SingleAxisPacking = 0, MultiAxisPacking = 1 }

enum	FusedKernelType { AddMulAdd = 0 }

enum	BackendCapability : uint32_t { NonConstWeights, AsyncExecution }
	BackendCapability class. More...

enum	LayerType { X, Activation, Addition, ArgMinMax, BatchNormalization, BatchToSpaceNd, Comparison, Concat, Constant, ConvertFp16ToFp32, ConvertFp32ToFp16, Convolution2d, Debug, DepthToSpace, DepthwiseConvolution2d, Dequantize, DetectionPostProcess, Division, ElementwiseUnary, FakeQuantization, Fill, Floor, FullyConnected, Gather, Input, InstanceNormalization, L2Normalization, LogicalBinary, LogSoftmax, Lstm, QLstm, Map, Maximum, Mean, MemCopy, MemImport, Merge, Minimum, Multiplication, Normalization, Output, Pad, Permute, Pooling2d, PreCompiled, Prelu, Quantize, QuantizedLstm, Reshape, Rank, Resize, Reduce, Slice, Softmax, SpaceToBatchNd, SpaceToDepth, Splitter, Stack, StandIn, StridedSlice, Subtraction, Switch, Transpose, TransposeConvolution2d, Unmap, Cast, Shape, UnidirectionalSequenceLstm, ChannelShuffle, Convolution3d, Pooling3d, GatherNd, BatchMatMul, ElementwiseBinary, ReverseV2, Tile, Fused, BroadcastTo, FirstLayer = Activation, LastLayer = BroadcastTo }
	When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below. More...

enum	LogSeverity { Trace, Debug, Info, Warning, Error, Fatal }

enum	GraphEvent { LayerAdded, LayerErased }

enum	JsonObjectType { Measurement, Event, ExecObjectDesc }

enum	TuningLevel { None, Rapid, Normal, Exhaustive }

Functions
LayerSupportHandle	GetILayerSupportByBackendId (const armnn::BackendId &backend)
	Convenience function to retrieve the ILayerSupportHandle for a backend. More...

bool	HasCapability (const std::string &name, const BackendCapabilities &capabilities)
	Convenience function to check if a capability exists in a BackendCapabilites struct. More...

bool	HasCapability (const std::string &name, const armnn::BackendId &backend)
	Convenience function to check if a capability exists in a backend. More...

bool	HasCapability (const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
	Convenience function to check if a given capability matches a capability in a BackendCapabilities struct. More...

bool	HasCapability (const BackendOptions::BackendOption &backendOption, const armnn::BackendId &backend)
	Convenience function to check if a given capability matches a capability in a backend. More...

bool	HasMatchingCapability (const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
	Convenience function to check if a given capability matches a capability in a BackendCapabilities struct. More...

bool	HasMatchingCapability (const BackendOptions::BackendOption &backendOption, const armnn::BackendId &backend)
	Convenience function to check if a given capability matches a capability in a backend. More...

Optional< const BackendOptions::BackendOption >	GetCapability (const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
	Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted. More...

Optional< const BackendOptions::BackendOption >	GetCapability (const std::string &backendCapabilityName, const armnn::BackendId &backend)
	Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted. More...

unsigned int	GetNumberOfCacheFiles (const armnn::BackendId &backend)
	Returns the number of cached files if backend supports caching. More...

constexpr char const *	GetComputeDeviceAsCString (Compute compute)
	Deprecated function that will be removed together with the Compute enum. More...

std::ostream &	operator<< (std::ostream &os, const std::vector< Compute > &compute)
	Deprecated function that will be removed together with the Compute enum. More...

std::ostream &	operator<< (std::ostream &os, const std::set< Compute > &compute)
	Deprecated function that will be removed together with the Compute enum. More...

std::ostream &	operator<< (std::ostream &os, const Compute &compute)
	Deprecated function that will be removed together with the Compute enum. More...

std::ostream &	operator<< (std::ostream &os, const BackendId &id)

template<template< typename... > class TContainer, typename... TContainerTemplateArgs>
std::ostream &	operator<< (std::ostream &os, const TContainer< BackendId, TContainerTemplateArgs... > &ids)

template<typename F >
void	ParseOptions (const std::vector< BackendOptions > &options, BackendId backend, F f)

bool	ParseBooleanBackendOption (const armnn::BackendOptions::Var &value, bool defaultValue)

std::string	ParseStringBackendOption (const armnn::BackendOptions::Var &value, std::string defaultValue)

int	ParseIntBackendOption (const armnn::BackendOptions::Var &value, int defaultValue)

BackendRegistry &	BackendRegistryInstance ()

std::ostream &	operator<< (std::ostream &os, const BackendVersion &backendVersion)

TensorShape	GetUnpaddedTensorStrides (const TensorInfo &tensorInfo)

DataType	GetBiasDataType (DataType inputDataType)

template<typename TensorShapeIt >
OriginsDescriptor	CreateDescriptorForConcatenation (TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
	Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing concatenation of a number of input tensors. More...

template<typename ExceptionType >
void	ConditionalThrow (bool condition, const std::string &message)

template<typename ExceptionType >
void	ConditionalThrow (bool condition)

template<typename ExceptionType , typename ComparedType >
void	ConditionalThrowIfNotEqual (const std::string &message, const ComparedType &leftHandSide, const ComparedType &rightHandSide)
	ComparedType must support: operator==(const ComparedType&) operator<<(ostream&, const ComparedType&) More...

IOptimizedNetworkPtr	Optimize (const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptionsOpaque &options=OptimizerOptionsOpaque(), Optional< std::vector< std::string > & > messages=EmptyOptional())
	Create an optimized version of the network. More...

IOptimizedNetworkPtr	Optimize (const Graph &inGraph, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptionsOpaque &options, Optional< std::vector< std::string > & > messages=EmptyOptional())
	Create an optimized version of the network. More...

IOptimizedNetworkPtr	Optimize (const Graph &inGraph, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options, Optional< std::vector< std::string > & > messages=EmptyOptional())
	Accept legacy OptimizerOptions. More...

IOptimizedNetworkPtr	Optimize (const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options, Optional< std::vector< std::string > & > messages=EmptyOptional())
	Accept legacy OptimizerOptions. More...

std::string	LevelToString (LogSeverity level)

LogSeverity	StringToLogLevel (std::string level)

void	SetLogFilter (LogSeverity level)

void	SetAllLoggingSinks (bool standardOut, bool debugOut, bool coloured)

constexpr LogSeverity	ConvertLogSeverity (BoostLogSeverityMapping severity)

template<typename Arg , typename std::enable_if< IsMemorySource< Arg >::value >::type * = nullptr>
MemorySourceFlags	Combine (Arg sourceA, Arg sourceB)

template<typename Arg , typename ... Args, typename std::enable_if< IsMemorySource< Arg >::value >::type * = nullptr>
MemorySourceFlags	Combine (Arg source, Args... rest)

bool	CheckFlag (MemorySourceFlags flags, MemorySource source)

template<typename T , class... Args>
Optional< T >	MakeOptional (Args &&... args)
	Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> object. More...

const char *	GetLayerTypeAsCString (LayerType type)

constexpr char const *	GetStatusAsCString (Status status)

constexpr char const *	GetActivationFunctionAsCString (ActivationFunction activation)

constexpr char const *	GetArgMinMaxFunctionAsCString (ArgMinMaxFunction function)

constexpr char const *	GetComparisonOperationAsCString (ComparisonOperation operation)

constexpr char const *	GetBinaryOperationAsCString (BinaryOperation operation)

constexpr char const *	GetUnaryOperationAsCString (UnaryOperation operation)

constexpr char const *	GetLogicalBinaryOperationAsCString (LogicalBinaryOperation operation)

constexpr char const *	GetFusedTypeAsCString (FusedKernelType type)

constexpr char const *	GetPoolingAlgorithmAsCString (PoolingAlgorithm pooling)

constexpr char const *	GetOutputShapeRoundingAsCString (OutputShapeRounding rounding)

constexpr char const *	GetPaddingMethodAsCString (PaddingMethod method)

constexpr char const *	GetPaddingModeAsCString (PaddingMode mode)

constexpr char const *	GetReduceOperationAsCString (ReduceOperation reduce_operation)

constexpr unsigned int	GetDataTypeSize (DataType dataType)

template<unsigned N>
constexpr bool	StrEqual (const char *strA, const char(&strB)[N])

constexpr armnn::Compute	ParseComputeDevice (const char *str)
	Deprecated function that will be removed together with the Compute enum. More...

constexpr const char *	GetDataTypeName (DataType dataType)

constexpr const char *	GetDataLayoutName (DataLayout dataLayout)

constexpr const char *	GetNormalizationAlgorithmChannelAsCString (NormalizationAlgorithmChannel channel)

constexpr const char *	GetNormalizationAlgorithmMethodAsCString (NormalizationAlgorithmMethod method)

constexpr const char *	GetResizeMethodAsCString (ResizeMethod method)

constexpr const char *	GetMemBlockStrategyTypeName (MemBlockStrategyType memBlockStrategyType)

template<typename T >
constexpr bool	IsQuantizedType ()

constexpr bool	IsQuantized8BitType (DataType dataType)

constexpr bool	IsQuantizedType (DataType dataType)

std::ostream &	operator<< (std::ostream &os, Status stat)

std::ostream &	operator<< (std::ostream &os, const armnn::TensorShape &shape)

template<typename QuantizedType >
QuantizedType	Quantize (float value, float scale, int32_t offset)
	Quantize a floating point data type into an 8-bit data type. More...

template<typename QuantizedType >
float	Dequantize (QuantizedType value, float scale, int32_t offset)
	Dequantize an 8-bit data type into a floating point data type. More...

void	VerifyTensorInfoDataType (const armnn::TensorInfo &info, armnn::DataType dataType)

template<typename ... Ts>
void	IgnoreUnused (Ts &&...)

template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest >	numeric_cast (Source source)

template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Source >::value &&std::is_integral< Source >::value &&std::is_signed< Dest >::value &&std::is_integral< Dest >::value, Dest >	numeric_cast (Source source)

template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Source >::value &&std::is_floating_point< Dest >::value, Dest >	numeric_cast (Source source)

template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Source >::value &&std::is_signed< Dest >::value &&std::is_integral< Dest >::value, Dest >	numeric_cast (Source source)

template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Source >::value &&std::is_integral< Source >::value &&std::is_floating_point< Dest >::value, Dest >	numeric_cast (Source source)

template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Dest >::value &&std::is_integral< Dest >::value &&std::is_unsigned< Source >::value, Dest >	numeric_cast (Source sValue)

template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Dest >::value &&std::is_unsigned< Source >::value, Dest >	numeric_cast (Source sValue)

template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Dest >::value &&std::is_signed< Source >::value &&std::is_integral< Source >::value, Dest >	numeric_cast (Source sValue)

template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Dest >::value &&std::is_floating_point< Source >::value, Dest >	numeric_cast (Source sValue)

template<typename DestType , typename SourceType >
DestType	PolymorphicDowncast (SourceType *value)
	Polymorphic downcast for build in pointers only. More...

template<typename DestType , typename SourceType >
auto	PolymorphicPointerDowncast (const SourceType &value)
	Polymorphic downcast for shared pointers and build in pointers. More...

std::chrono::high_resolution_clock::time_point	GetTimeNow ()

std::chrono::duration< double, std::milli >	GetTimeDuration (std::chrono::high_resolution_clock::time_point start_time)

template<typename Function , typename Iterator >
constexpr TransformIterator< Function, Iterator >	MakeTransformIterator (Iterator i, Function f)

void	ConfigureLogging (bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
	Configures the logging behaviour of the ARMNN library. More...

bool	NeonDetected ()

const std::string	GetVersion ()

float	roundf (float value)

void	swap (OriginsDescriptor &first, OriginsDescriptor &second)

void	swap (ViewsDescriptor &first, ViewsDescriptor &second)

uint32_t	GetNumInputs (bool biasEnabled)

void	AssertNumberOfInputSlots (Layer &layer)

template<typename T >
constexpr LayerType	LayerEnumOf (const T *=nullptr)

template<>
constexpr LayerType	LayerEnumOf (const ActivationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const AdditionLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ArgMinMaxLayer *)

template<>
constexpr LayerType	LayerEnumOf (const BatchMatMulLayer *)

template<>
constexpr LayerType	LayerEnumOf (const BatchNormalizationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const BatchToSpaceNdLayer *)

template<>
constexpr LayerType	LayerEnumOf (const BroadcastToLayer *)

template<>
constexpr LayerType	LayerEnumOf (const CastLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ChannelShuffleLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ComparisonLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ConcatLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ConstantLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ConvertFp16ToFp32Layer *)

template<>
constexpr LayerType	LayerEnumOf (const ConvertFp32ToFp16Layer *)

template<>
constexpr LayerType	LayerEnumOf (const Convolution2dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const Convolution3dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DebugLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DepthToSpaceLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DepthwiseConvolution2dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DequantizeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DetectionPostProcessLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DivisionLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ElementwiseBinaryLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ElementwiseUnaryLayer *)

template<>
constexpr LayerType	LayerEnumOf (const FakeQuantizationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const FillLayer *)

template<>
constexpr LayerType	LayerEnumOf (const FloorLayer *)

template<>
constexpr LayerType	LayerEnumOf (const FullyConnectedLayer *)

template<>
constexpr LayerType	LayerEnumOf (const FusedLayer *)

template<>
constexpr LayerType	LayerEnumOf (const GatherLayer *)

template<>
constexpr LayerType	LayerEnumOf (const GatherNdLayer *)

template<>
constexpr LayerType	LayerEnumOf (const InputLayer *)

template<>
constexpr LayerType	LayerEnumOf (const InstanceNormalizationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const L2NormalizationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const LogicalBinaryLayer *)

template<>
constexpr LayerType	LayerEnumOf (const LogSoftmaxLayer *)

template<>
constexpr LayerType	LayerEnumOf (const LstmLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MapLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MaximumLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MeanLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MemCopyLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MemImportLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MergeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MinimumLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MultiplicationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const NormalizationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const OutputLayer *)

template<>
constexpr LayerType	LayerEnumOf (const PadLayer *)

template<>
constexpr LayerType	LayerEnumOf (const PermuteLayer *)

template<>
constexpr LayerType	LayerEnumOf (const Pooling2dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const Pooling3dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const PreCompiledLayer *)

template<>
constexpr LayerType	LayerEnumOf (const PreluLayer *)

template<>
constexpr LayerType	LayerEnumOf (const QuantizeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const QLstmLayer *)

template<>
constexpr LayerType	LayerEnumOf (const QuantizedLstmLayer *)

template<>
constexpr LayerType	LayerEnumOf (const RankLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ReduceLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ReshapeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ResizeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ReverseV2Layer *)

template<>
constexpr LayerType	LayerEnumOf (const ShapeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SliceLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SoftmaxLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SpaceToBatchNdLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SpaceToDepthLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SplitterLayer *)

template<>
constexpr LayerType	LayerEnumOf (const StackLayer *)

template<>
constexpr LayerType	LayerEnumOf (const StandInLayer *)

template<>
constexpr LayerType	LayerEnumOf (const StridedSliceLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SubtractionLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SwitchLayer *)

template<>
constexpr LayerType	LayerEnumOf (const TileLayer *)

template<>
constexpr LayerType	LayerEnumOf (const TransposeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const TransposeConvolution2dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const UnidirectionalSequenceLstmLayer *)

template<>
constexpr LayerType	LayerEnumOf (const UnmapLayer *)

template<typename T , typename V >
void	SetValueChecked (Optional< T & > optionalRef, V &&val)

template<typename Float16Func , typename Float32Func , typename Uint8Func , typename Int32Func , typename BooleanFunc , typename ... Params>
bool	IsSupportedForDataTypeGeneric (Optional< std::string & > reasonIfUnsupported, DataType dataType, Float16Func float16FuncPtr, Float32Func float32FuncPtr, Uint8Func uint8FuncPtr, Int32Func int32FuncPtr, BooleanFunc booleanFuncPtr, Params &&... params)

template<typename ... Params>
bool	TrueFunc (Optional< std::string & > reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseFunc (Optional< std::string & > reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseFuncF16 (Optional< std::string & > reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseFuncF32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseFuncU8 (Optional< std::string & > reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseFuncI32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseInputFuncF32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseInputFuncF16 (Optional< std::string & > reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseOutputFuncF32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseOutputFuncF16 (Optional< std::string & > reasonIfUnsupported, Params &&... params)

void	ValidateSourcesMatchOptimizedNetwork (std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
	This function performs a sanity check to ensure that the combination of input and output memory source matches the values for importEnabled and exportEnabled that were specified during optimization. More...

void	CopyToOutputTensor (const Tensor &outputTensor, ITensorHandle *outputTensorHandle)

const armnn::ConstTensor	GetInputTensor (const LayerBindingId layerId, const InputTensors &inputTensors)

const armnn::Tensor	GetOutputTensor (const LayerBindingId layerId, const OutputTensors &outputTensors)

template<LogSeverity Level>
void	SetLoggingSinks (bool standardOut, bool debugOut, bool coloured)

void	ReportError (const std::string &errorMessage, Optional< std::vector< std::string > & > errorMessages)

void	ReportWarning (const std::string &warningMessage, Optional< std::vector< std::string > & > warningMessages)

OptimizationResult	ReturnWithError (OptimizationResult res, const Layer *layer, const BackendSettings &backendSettings, Optional< std::vector< std::string > & > errMessages)

bool	CheckScaleSetOnQuantizedType (Layer *layer, Optional< std::vector< std::string > & > errMessages)

OptimizationResult	AttemptBackendAssignment (BackendSettings &backendSettings, Graph &graph, Layer *layer, BackendId backend, DataType dataTypeIn, DataType dataTypeOut, const std::vector< BackendId > &availablePreferredBackends, std::string &reasonIfUnsupported, Optional< std::vector< std::string > & > errMessages)

std::vector< DataType >	GetLayerInOutDatatype (const Layer *layer)

bool	CheckFp16Support (BackendsMap &backends, const std::vector< BackendId > &availablePreferredBackends)

void	AssignBackendsIConnectable (OptimizedNetworkImpl optNetObjPtr, IConnectableLayer it, Optional< std::vector< std::string > & > errMessages, OptimizationResult &result, BackendSettings &backendSettings, std::vector< BackendId > &availablePreferredBackends)

OptimizationResult	AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, Graph::Iterator &firstLayer, Graph::Iterator &lastLayer, Optional< std::vector< std::string > & > errMessages)

OptimizationResult	AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView::IConnectableLayerIterator &firstLayer, SubgraphView::IConnectableLayerIterator &lastLayer, Optional< std::vector< std::string > & > errMessages)

OptimizationResult	AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView &subgraph, Optional< std::vector< std::string > & > errMessages)

BackendsMap	CreateSupportedBackends (TensorHandleFactoryRegistry &handleFactoryRegistry, BackendSettings &backendSettings)

OptimizationResult	ApplyBackendOptimizations (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, BackendsMap &backends, const ModelOptions &modelOptions, Optional< std::vector< std::string > & > errMessages)

bool	RequiresCopy (ITensorHandleFactory::FactoryId src, ITensorHandleFactory::FactoryId dst, TensorHandleFactoryRegistry &registry)

ITensorHandleFactory::FactoryId	CalculateSlotOptionForInput (BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry, bool importEnabled)

ITensorHandleFactory::FactoryId	CalculateSlotOptionForOutput (BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry)

ITensorHandleFactory::FactoryId	CalculateSlotOption (BackendsMap &backends, OutputSlot &outputSlot, TensorHandleFactoryRegistry &registry, bool exportEnabled)

EdgeStrategy	CalculateEdgeStrategy (BackendsMap &backends, ITensorHandleFactory::FactoryId srcFactoryId, const Layer &layer, const Layer &connectedLayer, TensorHandleFactoryRegistry &registry, bool importEnabled)

OptimizationResult	SelectTensorHandleStrategy (Graph &optGraph, BackendsMap &backends, TensorHandleFactoryRegistry &registry, bool importEnabled, bool exportEnabled, Optional< std::vector< std::string > & > errMessages)

std::vector< ConvertFp16ToFp32Layer * >	InsertConvertFp16ToFp32LayersBefore (Graph &graph, Layer &layer, bool expectCorrectInputType)

std::vector< ConvertFp32ToFp16Layer * >	InsertConvertFp32ToFp16LayersAfter (Graph &graph, Layer &layer)

std::vector< DebugLayer * >	InsertDebugLayerAfter (Graph &graph, Layer &layer, bool toFile)

bool	RevertConstantWeightsToFP32 (Layer *layer)

template<typename T >
void	Append (Optimizer::Optimizations &optimizations, T &&optimization)

template<typename Front , typename... Others>
void	Append (Optimizer::Optimizations &optimizations, Front &&front, Others &&... others)

template<typename... Args>
Optimizer::Optimizations	MakeOptimizations (Args &&... args)

Measurement	FindMeasurement (const std::string &name, const Event *event)

std::vector< Measurement >	FindKernelMeasurements (const Event *event)

const Event *	GetEventPtr (const Event *ptr)

const Event *	GetEventPtr (const std::unique_ptr< Event > &ptr)

int	CalcLevel (const Event *eventPtr)

void	ConfigureDetailsObject (JsonChildObject &detailsObject, std::string layerDetailsStr)

void	ExtractJsonObjects (unsigned int inferenceIndex, const Event parentEvent, JsonChildObject &parentObject, std::map< const Event , std::vector< const Event * >> descendantsMap)

template<typename DescriptorType >
void	ProfilingUpdateDescriptions (const std::string &name, const DescriptorType &desc, const WorkloadInfo &infos, const arm::pipe::ProfilingGuid guid)

template<typename Delegate >
void	ForEachLayerInput (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)

template<typename Delegate >
void	ForEachLayerOutput (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)

void	AssignSplitId (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo)

bool	IsReadyForSplitAssignment (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo)

bool	IsLayerSupported (const armnn::Layer *layer)

bool	IsLayerSupported (const armnn::Layer &layer)

bool	IsLayerOptimizable (const armnn::Layer *layer)

bool	IsLayerOptimizable (const armnn::Layer &layer)

constexpr const char *	MockTensorHandleFactoryId ()

Graph &	GetGraphForTesting (IOptimizedNetwork *optNet)

ModelOptions &	GetModelOptionsForTesting (IOptimizedNetwork *optNet)

arm::pipe::IProfilingService &	GetProfilingService (armnn::RuntimeImpl *runtime)

std::ostream &	operator<< (std::ostream &os, const BFloat16 &b)

template<typename LayerType >
LayerType *	FuseLayer (OptimizationViews &optimizationViews, LayerType baseLayer, LayerType replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)

template<typename LayerType >
LayerType *	FuseAdditionLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseSubtractionLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseDivisionLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseMultiplicationLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseElementwiseBinaryLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, BinaryOperation operation, std::string name)

template<typename LayerType >
LayerType *	FuseBatchNormalizationLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseConvolution2dLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseDepthwiseConvolution2dLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseFullyConnectedLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
std::vector< IConnectableLayer * >	ChainReduceLayers (OptimizationViews &optimizationViews, LayerType *baseLayer, ReduceDescriptor &desc)

template<typename LayerType >
void	ReplaceLayers (OptimizationViews &optimizationViews, LayerType baseLayer, std::vector< IConnectableLayer > &layers)

template<typename LayerType >
void	ReplaceMultipleLayers (OptimizationViews &optimizationViews, std::vector< IConnectableLayer * > &originalLayers, LayerType *baseLayer, const std::vector< SlotList > inputLayersSlotLists, const std::vector< SlotList > outputLayersSlotLists)

TuningLevel	ParseTuningLevel (const BackendOptions::Var &value, TuningLevel defaultValue)

void	ConfigureTuner (arm_compute::CLTuner &tuner, TuningLevel level)

arm_compute::NormalizationLayerInfo	CreateAclNormalizationLayerInfoForL2Normalization (const armnn::TensorInfo &tensorInfo, armnn::DataLayout dataLayout)

arm_compute::ActivationLayerInfo::ActivationFunction	ConvertActivationFunctionToAclActivationFunction (ActivationFunction armnnFunction)

arm_compute::ActivationLayerInfo	ConvertActivationDescriptorToAclActivationLayerInfo (const ActivationDescriptor &actDesc)

arm_compute::ActivationLayerInfo	ConvertActivationDescriptorToAclActivationLayerInfo (const ActivationDescriptor *activationDescPtr)

arm_compute::ActivationLayerInfo	ConvertAdditionalInfoToAclActivationLayerInfo (const QueueDescriptor &queueDescriptor)

arm_compute::ActivationLayerInfo	ConvertLstmActivationFuncToAclLayerInfo (uint32_t activationFunction)

arm_compute::ComparisonOperation	ConvertComparisonOperationToAcl (const ComparisonDescriptor &descriptor)

arm_compute::PoolingType	ConvertPoolingAlgorithmToAclPoolingType (PoolingAlgorithm poolingAlgorithm)

arm_compute::DimensionRoundingType	ConvertOutputShapeRoundingToAclDimensionRoundingType (OutputShapeRounding rounding)

arm_compute::NormType	ConvertNormalizationAlgorithmChannelToAclNormType (NormalizationAlgorithmChannel channelType)

arm_compute::FullyConnectedLayerInfo	ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo (const FullyConnectedDescriptor &fullyConnectedDesc, const ActivationDescriptor *activationDesc)

arm_compute::FullyConnectedLayerInfo	ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo (const FullyConnectedDescriptor &fullyConnectedDesc, arm_compute::ActivationLayerInfo activationLayerInfo)

arm_compute::InterpolationPolicy	ConvertResizeMethodToAclInterpolationPolicy (ResizeMethod resizeMethod)

template<typename T >
T	ComputeSoftmaxAclAxis (const SoftmaxDescriptor &softmaxDesc, const armnn::TensorInfo &tensor)

std::set< unsigned int >	ComputeSplitAxis (const armnn::SplitterDescriptor &desc, const TensorShape &input)

int	ComputeAclAxis (const int &armnnAxis, const armnn::TensorInfo &tensor)
	Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank) More...

arm_compute::Conv3dInfo	ComputeConv3DInfo (const armnn::Convolution3dDescriptor descriptor, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
	Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor. More...

arm_compute::Conv3dInfo	ComputeConv3DInfo (const armnn::Convolution3dQueueDescriptor queueDescriptor, bool isFastMathEnabled)

arm_compute::PaddingMode	ConvertPaddingModeToAcl (const PaddingMode &paddingMode)

arm_compute::ReductionOperation	ConvertReductionOperationToAcl (const ReduceDescriptor &descriptor)

const TensorInfo	ComputeReductionTensorShape (const armnn::TensorInfo &input, const std::vector< uint32_t > &vAxis, const bool keepDims)
	Function to compute the output tensor shape based on the axes and if keepDims is set. More...

armnn::Optional< armnn::DataType >	GetBiasTypeFromWeightsType (armnn::Optional< armnn::DataType > weightsType)

template<typename F >
bool	CheckSupportRule (F rule, Optional< std::string & > reasonIfUnsupported, const char *reason)

template<typename T >
bool	AllTypesAreEqualImpl (T)

template<typename T , typename... Rest>
bool	AllTypesAreEqualImpl (T t1, T t2, Rest... rest)

std::unique_ptr< IMemoryOptimizerStrategy >	GetMemoryOptimizerStrategy (const std::string &strategyName)

const std::vector< std::string >	GetMemoryOptimizerStrategyNames ()

bool	IsNCHW (armnn::Layer &layer)

void	ReportUntouchedLayers (OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)

template<typename LayerType >
LayerType *	FoldPadLayer (OptimizationViews &optimizationViews, LayerType baseLayer, LayerType replacementLayer, PadLayer *padLayer)

bool	ConnectedToLayerWithNCHW (Layer *baseLayer)
	Checks if the Layer is connected to any Layer that has an NCHW layout. More...

bool	ConnectedToLayerType (Layer *baseLayer, LayerType layerType, unsigned int dimSize=0)
	Checks the Layer's Connections to see if it's connected to a Layer with the provided layerType. More...

void	RemoveReshapeLayer (ReshapeLayer baseLayer, std::map< LayerGuid, Layer > &untouched, OptimizationViews &optimizationViews)

template<typename LayerType >
LayerType *	FoldPadIntoAveragePool2d (OptimizationViews &optimizationViews, Pooling2dLayer baseLayer, Pooling2dDescriptor &poolDescriptor, PadLayer padLayer)

bool	IsSequenceLayerType (Layer &layer, LayerType type)

bool	IsSequenceLayerType (Layer &layer, BinaryOperation type)

template<typename TYPE >
bool	IsLayerSequence (Layer &currentLayer, TYPE first, TYPE second, TYPE third, Layer *layerList[4], bool handleValidActivates, const std::vector< ActivationFunction > &validActivates)

armnn::ConstTensor	PermuteTensor (const ConstTensorHandle tensor, const PermutationVector &permutationVector, void permuteBuffer)

void	ReshapeWeightsForAcl (TensorInfo &weightInfo, DataLayout dataLayout)

template<typename DataType >
ConstTensor	ReorderWeightChannelsForAcl (const ConstTensor &weightHandle, DataLayout dataLayout, void *permuteBuffer)

TensorInfo	ConvertWeightTensorInfoFromArmnnToAcl (const TensorInfo &weightInfo, DataLayout dataLayout)

std::tuple< ConstTensor, unsigned int >	Convert1HWOTensorToAcl (const ConstTensorHandle weightTensor, const TensorInfo &inputInfo, const DataLayout dataLayout, void permuteBuffer)
	Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,IM] This function coverts a ConstCpuTensorHandle from [1,H,W,IM] to [1,IM,H,W] (if NCHW) or keeps it at [1,H,W,IM] (if NHWC) as required by the compute library. More...

std::tuple< TensorInfo, unsigned int >	Convert1HWOTensorInfoToAcl (const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
	Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,IM] This function coverts a TensorInfo from [1,H,W,IM] to [1,IM,H,W] (if NCHW) or keeps it at [1,H,W,IM] (if NHWC) as required by the compute library Returns a tuple of converted weights tensor info and depth multiplier. More...

std::tuple< ConstTensor, unsigned int >	Convert1HWOtoMIHW (const ConstTensorHandle weightTensor, const TensorInfo &inputInfo, const DataLayout &dataLayout, void permuteBuffer)
	Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]. More...

armnn::ConstTensor	ConvertWeightTensorFromArmnnToAcl (const ConstTensorHandle weightTensor, DataLayout dataLayout, void permuteBuffer)

int32_t	ConvertMaskToACLFormat (int32_t mask, int32_t numDim)

std::map< std::string, unsigned int >	CalculateGatherNdKeyIndices (TensorInfo inputInfo0, TensorInfo inputInfo1)
	Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1) More...

armnn::PermutationVector	GeneratePermutationVectorOnLastTwoDimensions (unsigned int rank)
	Generates a permutation vector of size rank that permutes the 2 most right dimensions. More...

template<typename CopyFunc >
void	CopyTensorContentsGeneric (const ITensorHandle srcTensor, ITensorHandle dstTensor, CopyFunc copy)

template<typename SrcTensorHandleType , typename DstTensorHandleType , typename DescriptorType >
void	GatherTensorHandlePairs (const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType , DstTensorHandleType >> &tensorHandlePairs)

constexpr const char *	ClBackendId ()

flatbuffers::Offset< ClContext >	CreateClContext (flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>> programs=0)

flatbuffers::Offset< ClContext >	CreateClContextDirect (flatbuffers::FlatBufferBuilder &_fbb, const std::vector< flatbuffers::Offset< armnn::Program >> *programs=nullptr)

flatbuffers::Offset< Program >	CreateProgram (flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::String > name=0, flatbuffers::Offset< flatbuffers::Vector< uint8_t >> binary=0)

flatbuffers::Offset< Program >	CreateProgramDirect (flatbuffers::FlatBufferBuilder &_fbb, const char name=nullptr, const std::vector< uint8_t > binary=nullptr)

const armnn::ClContext *	GetClContext (const void *buf)

const armnn::ClContext *	GetSizePrefixedClContext (const void *buf)

const char *	ClContextIdentifier ()

bool	ClContextBufferHasIdentifier (const void *buf)

bool	VerifyClContextBuffer (flatbuffers::Verifier &verifier)

bool	VerifySizePrefixedClContextBuffer (flatbuffers::Verifier &verifier)

const char *	ClContextExtension ()

void	FinishClContextBuffer (flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset< armnn::ClContext > root)

void	FinishSizePrefixedClContextBuffer (flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset< armnn::ClContext > root)

constexpr const char *	ClImportTensorHandleFactoryId ()

constexpr const char *	ClTensorHandleFactoryId ()

arm_compute::Status	ClAbsWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClActivationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor)

arm_compute::Status	ClAdditionValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClArgMinMaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ArgMinMaxDescriptor &descriptor)

arm_compute::Status	ClBatchMatMulValidate (const TensorInfo &inputInfoX, const TensorInfo &inputInfoY, const TensorInfo &outputInfo, const BatchMatMulDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClBatchNormalizationValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClBatchToSpaceNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor)

arm_compute::Status	ClCastValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClChannelShuffleValidate (const TensorInfo &input, const TensorInfo &output, const ChannelShuffleDescriptor &descriptor)

arm_compute::Status	ClComparisonWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ComparisonDescriptor &descriptor)

arm_compute::Status	ClConcatWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const OriginsDescriptor &descriptor)

arm_compute::Status	ClConstantWorkloadValidate (const TensorInfo &output)

arm_compute::Status	ClConvertFp16ToFp32WorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClConvertFp32ToFp16WorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClConvolution3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution3dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClDepthToSpaceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthToSpaceDescriptor &descriptor)

arm_compute::Status	ClDepthwiseConvolutionWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClDequantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClDivisionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClElementwiseBinaryValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ElementwiseBinaryDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClExpWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClFloorWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClFullyConnectedWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClGatherNdWorkloadValidate (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo)

arm_compute::Status	ClGatherWorkloadValidate (const TensorInfo &input, const TensorInfo &indices, const TensorInfo &output, const GatherDescriptor &descriptor)

arm_compute::Status	ClInstanceNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const InstanceNormalizationDescriptor &descriptor)

arm_compute::Status	ClL2NormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor)

arm_compute::Status	ClLogicalAndWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	ClLogicalNotWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClLogicalOrWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	ClLogSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const LogSoftmaxDescriptor &descriptor)

arm_compute::Status	ClLogWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

arm_compute::Status	ClMaximumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	ClMeanValidate (const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor)

arm_compute::Status	ClMinimumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	ClMultiplicationWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClNegWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor)

arm_compute::Status	ClPadValidate (const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor)

arm_compute::Status	ClPermuteWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor)

arm_compute::Status	ClPooling2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor)

arm_compute::Status	ClPooling3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling3dDescriptor &descriptor)

arm_compute::Status	ClPreluWorkloadValidate (const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output)

arm_compute::Status	ClQLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const TensorInfo &output, const QLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

arm_compute::Status	ClQuantizedLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &previousCellStateIn, const TensorInfo &previousOutputIn, const TensorInfo &cellStateOut, const TensorInfo &output, const QuantizedLstmInputParamsInfo &paramsInfo)

arm_compute::Status	ClQuantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClReduceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)

arm_compute::Status	ClReshapeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClResizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor)

arm_compute::Status	ClReverseV2WorkloadValidate (const TensorInfo &input, const TensorInfo &axis, const TensorInfo &output)

arm_compute::Status	ClRsqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClSinWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SliceDescriptor &descriptor)

arm_compute::Status	ClSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)

arm_compute::Status	ClSpaceToBatchNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor)

arm_compute::Status	ClSpaceToDepthWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor)

arm_compute::Status	ClSplitterWorkloadValidate (const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, unsigned int splitAxis)

arm_compute::Status	ClSqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClStackWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const StackDescriptor &descriptor)

arm_compute::Status	ClStridedSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor)

arm_compute::Status	ClSubtractionValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClTileWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TileDescriptor &descriptor)

arm_compute::Status	ClTransposeConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)

arm_compute::Status	ClTransposeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeDescriptor &descriptor)

arm_compute::Status	ClUnidirectionalSequenceLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

std::string	GetConvolutionMethodString (arm_compute::ConvolutionMethod &convolutionMethod)

template<typename T >
void	CopyArmComputeClTensorData (arm_compute::CLTensor &dstTensor, const T *srcData)

auto	SetClStridedSliceData (const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)

auto	SetClSliceData (const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)

void	InitializeArmComputeClTensorData (arm_compute::CLTensor &clTensor, const ConstTensorHandle *handle)

RuntimeException	WrapClError (const cl::Error &clError, const CheckLocation &location)

void	RunClFunction (arm_compute::IFunction &function, const CheckLocation &location)

template<typename DataType , typename PayloadType >
DataType *	GetOutputTensorData (unsigned int idx, const PayloadType &data)

template<typename T >
void	DeleteAsType (const void *const blob)

SubgraphView::InputSlots	CreateInputsFrom (Layer *layer)

SubgraphView::OutputSlots	CreateOutputsFrom (Layer *layer)

SubgraphView::SubgraphViewPtr	CreateSubgraphViewFrom (SubgraphView::InputSlots &&inputs, SubgraphView::OutputSlots &&outputs, SubgraphView::Layers &&layers)

constexpr const char *	GpuFsaBackendId ()

template<typename ... Args>
bool	IsGpuFsaBackendSupported (Optional< std::string & > reasonIfUnsupported, Args... args)

constexpr const char *	GpuFsaTensorHandleFactoryId ()

template<DataType ArmnnType>
bool	IsDataType (const WorkloadInfo &info)

arm_compute::Status	GpuFsaActivationValidate (const TensorInfo &input, const ActivationDescriptor &descriptor)

void	GpuFsaActivationCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ActivationDescriptor &descriptor)

arm_compute::Status	GpuFsaBatchMatMulValidate (const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)

void	GpuFsaBatchMatMulCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)

arm_compute::Status	GpuFsaCastValidate (const TensorInfo &input, const TensorInfo &output)

void	GpuFsaCastCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	GpuFsaConvolution2dValidate (const TensorInfo &input, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)

void	GpuFsaConvolution2dCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)

arm_compute::Status	GpuFsaDepthwiseConvolution2dValidate (const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)

void	GpuFsaDepthwiseConvolution2dCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)

arm_compute::Status	GpuFsaElementwiseBinaryValidate (const TensorInfo &input0, const TensorInfo &input1, const ElementwiseBinaryDescriptor &descriptor)

void	GpuFsaElementwiseBinaryCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const ElementwiseBinaryDescriptor &descriptor)

arm_compute::Status	GpuFsaPooling2dValidate (const TensorInfo &input, const Pooling2dDescriptor &descriptor)

void	GpuFsaPooling2dCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Pooling2dDescriptor &descriptor)

arm_compute::Status	GpuFsaReshapeValidate (const TensorInfo &input, const ReshapeDescriptor &descriptor)

void	GpuFsaReshapeCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ReshapeDescriptor &descriptor)

arm_compute::Status	GpuFsaResizeValidate (const TensorInfo &input, const ResizeDescriptor &descriptor)

void	GpuFsaResizeCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ResizeDescriptor &descriptor)

arm_compute::Status	GpuFsaSoftmaxValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)

void	GpuFsaSoftmaxCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)

arm_compute::Status	GpuFsaConstantWorkloadValidate (const TensorInfo &output)

bool	GpuFsaPreCompiledWorkloadValidate (std::string *reasonIfUnsupported)

constexpr const char *	NeonBackendId ()

bool	CollapseLeadingUnitDimensions (const TensorInfo &in, TensorInfo &out)

template<typename SlotListType >
void	BuildAddMulAddSlotLists (bool handleReLu, bool multipleOutputs, std::vector< SlotListType > &inputLayersSlotLists, std::vector< SlotListType > &outputLayersSlotLists)

void	GetFusedName (Layer *layerList[4], std::string &fusedName)

template<typename Type >
bool	BuildAddMulAddTensorInfoLists (Type layerList[4], unsigned int &numInputs, unsigned int &numOutputs, std::vector< TensorInfo > &inputInfos, std::vector< TensorInfo > &outputInfos, const ActivationDescriptor &activationDescriptor, bool &fuseReLu)

bool	IsLayerTypeSupported (const LayerType &type, const std::vector< TensorInfo > &infos, const BaseDescriptor &descriptor, const Optional< LstmInputParamsInfo > &lstmParamsInfo, const Optional< QuantizedLstmInputParamsInfo > &quantizedLstmParamsInfo, Optional< std::string & > reasonIfUnsupported, const NeonLayerSupport &support)

constexpr const char *	NeonTensorHandleFactoryId ()

arm_compute::Status	NeonAbsWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonActivationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor)

arm_compute::Status	NeonAdditionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonArgMinMaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ArgMinMaxDescriptor &descriptor)

arm_compute::Status	NeonBatchMatMulValidate (const TensorInfo &inputInfoX, const TensorInfo &inputInfoY, const TensorInfo &outputInfo, const BatchMatMulDescriptor &descriptor, const bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonBatchNormalizationValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonBatchToSpaceNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor)

arm_compute::Status	NeonCastValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonChannelShuffleValidate (const TensorInfo &input, const TensorInfo &output, const ChannelShuffleDescriptor &descriptor)

arm_compute::Status	NeonComparisonWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ComparisonDescriptor &descriptor)

arm_compute::Status	NeonConcatWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const OriginsDescriptor &descriptor)

arm_compute::Status	NeonConstantWorkloadValidate (const TensorInfo &output)

arm_compute::Status	NeonConvertFp16ToFp32WorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonConvertFp32ToFp16WorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonConvolution3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution3dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonDepthToSpaceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthToSpaceDescriptor &descriptor)

arm_compute::Status	NeonDepthwiseConvolutionWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonDequantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::DetectionPostProcessLayerInfo	MakeInfo (const DetectionPostProcessDescriptor &descriptor)

arm_compute::Status	NeonDetectionPostProcessValidate (const TensorInfo &boxEncodings, const TensorInfo &scores, const TensorInfo &anchors, const TensorInfo &detectionBoxes, const TensorInfo &detectionClasses, const TensorInfo &detectionScores, const TensorInfo &numDetections, const DetectionPostProcessDescriptor &descriptor)

arm_compute::Status	NeonDivisionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonElementwiseBinaryWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ElementwiseBinaryDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonExpWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonFullyConnectedWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonFusedWorkloadValidate (const std::vector< std::reference_wrapper< TensorInfo >> &inputInfos, const std::vector< std::reference_wrapper< TensorInfo >> &outputInfos, const FusedDescriptor &fusedDescriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonGatherNdWorkloadValidate (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo)

arm_compute::Status	NeonGatherWorkloadValidate (const TensorInfo &input, const TensorInfo &indices, const TensorInfo &output, const GatherDescriptor &descriptor)

arm_compute::Status	NeonInstanceNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const InstanceNormalizationDescriptor &descriptor)

arm_compute::Status	NeonL2NormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor)

arm_compute::Status	NeonLogicalAndWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	NeonLogicalNotWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonLogicalOrWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	NeonLogSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const LogSoftmaxDescriptor &descriptor)

arm_compute::Status	NeonLogWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

arm_compute::Status	NeonMaximumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	NeonMeanWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor)

arm_compute::Status	NeonMinimumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
	Validate function for validating the inputs and output. More...

arm_compute::Status	NeonMultiplicationWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonNegWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor)

arm_compute::Status	NeonPadWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor)

arm_compute::Status	NeonPermuteWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor)

arm_compute::Status	NeonPooling2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor)

arm_compute::Status	NeonPooling3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling3dDescriptor &descriptor)

arm_compute::Status	NeonPreluWorkloadValidate (const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output)

arm_compute::Status	NeonQLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const TensorInfo &output, const QLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

arm_compute::Status	NeonQuantizedLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const QuantizedLstmInputParamsInfo &paramsInfo)

arm_compute::Status	NeonQuantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonReduceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)

arm_compute::Status	NeonReshapeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonResizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor)

arm_compute::Status	NeonReverseV2WorkloadValidate (const TensorInfo &input, const TensorInfo &axis, const TensorInfo &output)

arm_compute::Status	NeonRsqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonSinWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SliceDescriptor &descriptor)

arm_compute::Status	NeonSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)

arm_compute::Status	NeonSpaceToBatchNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor)

arm_compute::Status	NeonSpaceToDepthWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor)

arm_compute::Status	NeonSplitterWorkloadValidate (const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, unsigned int splitAxis)

arm_compute::Status	NeonSqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonStackWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const StackDescriptor &descriptor)

arm_compute::Status	NeonStridedSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor)

arm_compute::Status	NeonSubtractionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonTileWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TileDescriptor &descriptor)

arm_compute::Status	NeonTransposeConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)

arm_compute::Status	NeonTransposeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeDescriptor &descriptor)

arm_compute::Status	NeonUnidirectionalSequenceLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

arm_compute::Status	NeonUnidirectionalSequenceLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

template<typename T >
void	CopyArmComputeTensorData (arm_compute::Tensor &dstTensor, const T *srcData)

void	InitializeArmComputeTensorData (arm_compute::Tensor &tensor, TensorInfo tensorInfo, const ITensorHandle *handle)

void	InitializeArmComputeTensorData (arm_compute::Tensor &tensor, const ConstTensorHandle *handle)

auto	SetNeonStridedSliceData (const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)

auto	SetNeonSliceData (const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)

constexpr const char *	RefBackendId ()

constexpr const char *	RefTensorHandleFactoryId ()

bool	IsSigned64 (const WorkloadInfo &info)

bool	IsSigned32 (const WorkloadInfo &info)

bool	IsBFloat16 (const WorkloadInfo &info)

bool	IsFloat16 (const WorkloadInfo &info)

bool	IsQSymmS16 (const WorkloadInfo &info)

bool	IsQSymmS8 (const WorkloadInfo &info)

bool	IsQAsymmS8 (const WorkloadInfo &info)

bool	IsQAsymmU8 (const WorkloadInfo &info)

template<typename QueueDescriptorType >
constexpr bool	IsOperationQueueDescriptor (const QueueDescriptorType &)

template<>
constexpr bool	IsOperationQueueDescriptor (const MemCopyQueueDescriptor &)

template<>
constexpr bool	IsOperationQueueDescriptor (const ConstantQueueDescriptor &)

template<>
constexpr bool	IsOperationQueueDescriptor (const PermuteQueueDescriptor &)

float	Activation (float in, ActivationFunction function, float a, float b)

void	Activation (Decoder< float > &in, Encoder< float > &out, const TensorInfo &tensorInfo, ActivationFunction function, float a, float b)

template<typename OUT >
void	ArgMinMax (Decoder< float > &in, OUT *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)

template void	ArgMinMax (Decoder< float > &in, int32_t *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)

template void	ArgMinMax (Decoder< float > &in, int64_t *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)

void	BatchNormImpl (const BatchNormalizationQueueDescriptor &data, Decoder< float > &meanDecoder, Decoder< float > &varianceDecoder, Decoder< float > &betaDecoder, Decoder< float > &gammaDecoder, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)

unsigned int	Offset (const TensorShape &shape, unsigned int batch, unsigned int height, unsigned int width, unsigned int channels, const DataLayoutIndexed &dataLayout)

void	BatchToSpaceNd (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const BatchToSpaceNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)

void	Concatenate (const ConcatQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)

void	Convolve3d (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int paddingFront, unsigned int xStride, unsigned int yStride, unsigned int zStride, unsigned int xDilation, unsigned int yDilation, unsigned int zDilation)

void	Convolve (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)

template<typename T >
void	PrintOutput (const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, std::ostream &os)

template<typename T >
void	Debug (const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)

template void	Debug< BFloat16 > (const TensorInfo &inputInfo, const BFloat16 *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)

template void	Debug< Half > (const TensorInfo &inputInfo, const Half *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)

template void	Debug< float > (const TensorInfo &inputInfo, const float *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)

template void	Debug< uint8_t > (const TensorInfo &inputInfo, const uint8_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)

template void	Debug< int8_t > (const TensorInfo &inputInfo, const int8_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)

template void	Debug< int16_t > (const TensorInfo &inputInfo, const int16_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)

template void	Debug< int32_t > (const TensorInfo &inputInfo, const int32_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)

template void	Debug< int64_t > (const TensorInfo &inputInfo, const int64_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)

template<typename T >
std::unique_ptr< Decoder< T > >	MakeDecoder (const TensorInfo &info, const void *data=nullptr)

template<>
std::unique_ptr< Decoder< float > >	MakeDecoder (const TensorInfo &info, const void *data)

void	DepthToSpace (const TensorInfo &inputInfo, const DepthToSpaceDescriptor &descriptor, const void inputData, void outputData, unsigned int dataTypeSize)

void	Dequantize (Decoder< float > &inputDecoder, Encoder< float > &outputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo)

std::vector< unsigned int >	GenerateRangeK (unsigned int k)

void	TopKSort (unsigned int k, unsigned int indices, const float values, unsigned int numElement)

float	IntersectionOverUnion (const float boxI, const float boxJ)

std::vector< unsigned int >	NonMaxSuppression (unsigned int numBoxes, const std::vector< float > &boxCorners, const std::vector< float > &scores, float nmsScoreThreshold, unsigned int maxDetection, float nmsIouThreshold)

void	AllocateOutputData (unsigned int numOutput, unsigned int numSelected, const std::vector< float > &boxCorners, const std::vector< unsigned int > &outputIndices, const std::vector< unsigned int > &selectedBoxes, const std::vector< unsigned int > &selectedClasses, const std::vector< float > &selectedScores, float detectionBoxes, float detectionScores, float detectionClasses, float numDetections)

void	DetectionPostProcess (const TensorInfo &boxEncodingsInfo, const TensorInfo &scoresInfo, const TensorInfo &anchorsInfo, const TensorInfo &detectionBoxesInfo, const TensorInfo &detectionClassesInfo, const TensorInfo &detectionScoresInfo, const TensorInfo &numDetectionsInfo, const DetectionPostProcessDescriptor &desc, Decoder< float > &boxEncodings, Decoder< float > &scores, Decoder< float > &anchors, float detectionBoxes, float detectionClasses, float detectionScores, float numDetections)

template<typename T >
std::unique_ptr< Encoder< T > >	MakeEncoder (const TensorInfo &info, void *data=nullptr)

template<>
std::unique_ptr< Encoder< float > >	MakeEncoder (const TensorInfo &info, void *data)

void	Fill (Encoder< float > &output, const TensorShape &desiredOutputShape, const float value)
	Creates a tensor and fills it with a scalar value. More...

void	FullyConnected (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *rBiasDecoder, bool biasEnabled, unsigned int K, bool transposeWeights)
	Performs a matrix multiplication and optionally adds a bias. More...

void	Gather (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo, Decoder< float > &params, const int32_t *indices, Encoder< float > &output, const int32_t axis_int)

void	InstanceNorm (const InstanceNormalizationQueueDescriptor &data, const TensorInfo &inputInfo, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)

void	LogSoftmax (Decoder< float > &input, Encoder< float > &output, const TensorInfo &inputInfo, const LogSoftmaxDescriptor &descriptor)

void	LstmImpl (const LstmDescriptor &descriptor, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const TensorShape &inputToOutputWeightsShape, const TensorShape &recurrentToOutputWeightsShape, std::unique_ptr< Decoder< float >> &inputData, std::unique_ptr< Decoder< float >> &outputStateIn, std::unique_ptr< Decoder< float >> &cellStateIn, std::unique_ptr< Encoder< float >> &outputStateOut, std::unique_ptr< Encoder< float >> &cellStateOut, std::unique_ptr< Encoder< float >> &output, std::unique_ptr< Decoder< float >> &cellStateOutDecoder, std::unique_ptr< Decoder< float >> &outputDecoder, std::unique_ptr< Decoder< float >> &inputToInputWeightsTensor, std::unique_ptr< Decoder< float >> &inputToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &inputToCellWeightsTensor, std::unique_ptr< Decoder< float >> &inputToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToInputWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToCellWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &cellToInputWeightsTensor, std::unique_ptr< Decoder< float >> &cellToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &cellToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &inputGateBiasTensor, std::unique_ptr< Decoder< float >> &forgetGateBiasTensor, std::unique_ptr< Decoder< float >> &cellBiasTensor, std::unique_ptr< Decoder< float >> &outputGateBiasTensor, std::unique_ptr< Decoder< float >> &projectionWeightsTensor, std::unique_ptr< Decoder< float >> &projectionBiasTensor, std::unique_ptr< Decoder< float >> &inputLayerNormWeights, std::unique_ptr< Decoder< float >> &forgetLayerNormWeights, std::unique_ptr< Decoder< float >> &cellLayerNormWeights, std::unique_ptr< Decoder< float >> &outputLayerNormWeights, std::unique_ptr< Encoder< float >> &inputGateScratch, std::unique_ptr< Encoder< float >> &cellScratch, std::unique_ptr< Encoder< float >> &forgetGateScratch, std::unique_ptr< Encoder< float >> &outputGateScratch, std::unique_ptr< Decoder< float >> &inputGateScratchDecoder, std::unique_ptr< Decoder< float >> &cellScratchDecoder, std::unique_ptr< Decoder< float >> &forgetGateScratchDecoder, std::unique_ptr< Decoder< float >> &outputGateScratchDecoder, float layerNormEpsilon)

void	MirrorPad (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle inputHandle, ITensorHandle outputHandle, const PadQueueDescriptor &data)

void	Pad (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle inputHandle, ITensorHandle outputHandle, const PadQueueDescriptor &data)

void	Pooling2d (Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling2dDescriptor &params)
	Computes the Pooling2d operation. More...

void	Pooling3d (Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling3dDescriptor &params)
	Computes the Pooling3d operation. More...

void	PreluImpl (const TensorInfo &inputInfo, const TensorInfo &alphaInfo, const TensorInfo &outputInfo, Decoder< float > &inputData, Decoder< float > &alphaData, Encoder< float > &outputData)

bool	NextIndex (const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &current)

unsigned int	ReducedOutputOffset (const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &index, const unsigned int numAxis, const std::vector< unsigned int > &axis)

void	Reduce (const TensorInfo &inputInfo, const TensorInfo &outputInfo, Decoder< float > &input, Encoder< float > &output, const std::vector< uint32_t > axis, const ReduceOperation reduceOperation)

template<typename DataType >
void	ExecuteFunction (std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs, BinaryOperation operation)

void	FakeQuantization (const float inputData, float outputData, uint32_t numElements, float min, float max)

unsigned int	GetNumActivations (const TensorInfo &inputInfo)

template<typename TensorHandleType = RefTensorHandle>
const TensorInfo &	GetTensorInfo (const ITensorHandle *tensorHandle)
	float32 helpers More...

template<typename DataType , typename PayloadType >
const DataType *	GetInputTensorData (unsigned int idx, const PayloadType &data)

template<typename DataType >
DataType *	GetOutputTensorData (ITensorHandle *tensorHandle)

template<typename PayloadType >
const float *	GetInputTensorDataFloat (unsigned int idx, const PayloadType &data)

template<typename PayloadType >
float *	GetOutputTensorDataFloat (unsigned int idx, const PayloadType &data)

template<typename PayloadType >
const Half *	GetInputTensorDataHalf (unsigned int idx, const PayloadType &data)

template<typename PayloadType >
Half *	GetOutputTensorDataHalf (unsigned int idx, const PayloadType &data)

template<typename PayloadType >
const BFloat16 *	GetInputTensorDataBFloat16 (unsigned int idx, const PayloadType &data)

template<typename PayloadType >
BFloat16 *	GetOutputTensorDataBFloat16 (unsigned int idx, const PayloadType &data)

template<typename T >
std::vector< float >	Dequantize (const T *quant, const TensorInfo &info)
	u8 helpers More...

template<typename T >
void	Dequantize (const T inputData, float outputData, const TensorInfo &info)

void	Quantize (uint8_t quant, const float dequant, const TensorInfo &info)

void	Resize (Decoder< float > &in, const TensorInfo &inputInfo, Encoder< float > &out, const TensorInfo &outputInfo, DataLayoutIndexed dataLayout, ResizeMethod resizeMethod, bool alignCorners, bool halfPixelCenters)

std::vector< unsigned int >	ReverseGetMultIdx (const unsigned int idx, unsigned int inputRank, std::vector< unsigned int > &elementNumInner)

unsigned int	ReverseGetFlatIdx (const std::vector< unsigned int > &idxList, unsigned int inputRank, std::vector< unsigned int > &elementNumInner)

unsigned int	ReverseRelocateIdx (unsigned int idx, unsigned int inputRank, std::vector< bool > &axisFlag, std::vector< unsigned int > &dimSize, std::vector< unsigned int > &elementNumInner)

void	ReverseV2 (const TensorInfo &inputInfo, const TensorInfo &axisInfo, Decoder< float > &inputDecoder, Decoder< int > &axisDecoder, Encoder< float > &outputEncoder)

void	Slice (const TensorInfo &inputInfo, const SliceDescriptor &descriptor, const void inputData, void outputData, unsigned int dataTypeSize)

void	Softmax (Decoder< float > &in, Encoder< float > &out, const TensorInfo &inputTensorInfo, float beta, int axis)
	Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. More...

unsigned int	GetOffset (const TensorShape &shape, unsigned int b, unsigned int h, unsigned int w, unsigned int c, const DataLayoutIndexed &dataLayout)

void	SpaceToBatchNd (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToBatchNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)

void	SpaceToDepth (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToDepthDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)

void	Split (const SplitterQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)

template<typename DataType >
void	Splitter (const SplitterQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)

void	Stack (const StackQueueDescriptor &data, std::vector< std::unique_ptr< Decoder< float >>> &inputs, Encoder< float > &output, const TensorInfo &inputInfo, const TensorInfo &outputInfo)

void	StridedSlice (const TensorInfo &inputInfo, const StridedSliceDescriptor &params, const void inputData, void outputData, unsigned int dataTypeSize)

std::vector< uint32_t >	IndexToCoordinates (std::vector< uint32_t > &shape, uint32_t index)

uint32_t	CoordinatesToIndex (TensorShape &shape, std::vector< uint32_t > &coordinates)

void	Tile (const TileDescriptor &params, const TensorInfo &inputInfo, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)

void	TransposeConvolution2dImpl (const TransposeConvolution2dDescriptor &descriptor, const TensorShape &inputShape, Decoder< float > &inputDecoder, const TensorShape &outputShape, Encoder< float > &outputEncoder, const TensorShape &weightsShape, Decoder< float > &weightsDecoder, Decoder< float > *biasesDecoder)

constexpr const char *	TosaRefBackendId ()

constexpr const char *	TosaRefTensorHandleFactoryId ()

bool	TosaRefPreCompiledWorkloadValidate (std::string *)

Variables
constexpr unsigned int	MaxNumOfTensorDimensions = 5U

constexpr unsigned int	LOWEST_CAPTURE_PERIOD = 10000u
	The lowest performance data capture interval we support is 10 miliseconds. More...

constexpr unsigned int	EXPIRE_RATE = 3U
	Variable to control expire rate of priority queue. More...

constexpr std::size_t	g_ProfilingEventCountHint = 1024

constexpr bool	g_WriteProfilingEventSequence = true

constexpr bool	g_AggregateProfilingEventsByInference = true

constexpr bool	g_WriteReportToStdOutOnProfilerDestruction = false

thread_local IProfiler *	tl_Profiler = nullptr

constexpr size_t	wordSize = sizeof(size_t) * 8

const BackendCapabilities	gpuFsaCapabilities ("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", false} })

const BackendCapabilities	cpuAccCapabilities ("CpuAcc", { {"NonConstWeights", true}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", arm_compute::CPUInfo::get().has_fp16()} })

const std::set< armnn::LayerType >	paddingRequiredLayers

const BackendCapabilities	cpuRefCapabilities ("CpuRef", { {"NonConstWeights", true}, {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", true} })

const std::set< armnn::BackendCapability >	oldCpuRefCapabilities

Detailed Description

Optional is a drop in replacement for std::optional until we migrate to c++-17.

Create pages for each tool so they appear nicely in the doxygen tree-view.

SPDX-License-Identifier: MIT

Subpages are not listed there. Also we can overwrite the page name this way.

Subpages are not listed there.

Note: The parser, serializer and deserializer pages are created in 01_parsers.dox or 02_deserializer_serializer.dox

Only a subset of the optional features are implemented that we intend to use in ArmNN. There are two distinct implementations here:

1, for normal constructable/destructable types and reference types 2, for reference types The std::optional features we support are:

has_value() and operator bool() to tell if the optional has a value
value() returns a reference to the held object

Typedef Documentation

◆ ACLMemManagerOnDemand

using ACLMemManagerOnDemand = std::shared_ptr<arm_compute::MemoryManagerOnDemand>

Definition at line 22 of file NeonFullyConnectedWorkload.cpp.

◆ AdditionalInfoObjectPtr

using AdditionalInfoObjectPtr = std::shared_ptr<void>

Definition at line 228 of file Layer.hpp.

◆ BackendCapabilities

using BackendCapabilities = BackendOptions

Definition at line 19 of file BackendOptions.hpp.

◆ BackendIdSet

using BackendIdSet = std::unordered_set<BackendId>

Definition at line 193 of file BackendId.hpp.

◆ BackendIdVector

using BackendIdVector = std::vector<BackendId>

Definition at line 192 of file BackendId.hpp.

◆ BackendsMap

using BackendsMap = std::map<BackendId, std::unique_ptr<class IBackendInternal> >

Definition at line 282 of file Network.hpp.

◆ BaseFloat32ComparisonWorkload

using BaseFloat32ComparisonWorkload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean>

Definition at line 234 of file Workload.hpp.

◆ BaseUint8ComparisonWorkload

using BaseUint8ComparisonWorkload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Boolean>

Definition at line 239 of file Workload.hpp.

◆ BFloat16ToFloat32Workload

using BFloat16ToFloat32Workload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::BFloat16, armnn::DataType::Float32>

Definition at line 244 of file Workload.hpp.

◆ BindingPointInfo

using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>

Definition at line 276 of file Tensor.hpp.

◆ BooleanWorkload

using BooleanWorkload = TypedWorkload<QueueDescriptor, armnn::DataType::Boolean>

Definition at line 229 of file Workload.hpp.

◆ CompiledBlobDeleter

typedef std::function< void(const void *)> CompiledBlobDeleter

Definition at line 342 of file INetwork.hpp.

◆ CompiledBlobPtr

typedef std::unique_ptr< void, CompiledBlobDeleter > CompiledBlobPtr

Definition at line 343 of file INetwork.hpp.

◆ ConcatDescriptor

using ConcatDescriptor = OriginsDescriptor

Definition at line 59 of file DescriptorsFwd.hpp.

◆ Coordinates

using Coordinates = std::array<unsigned int, MaxNumOfTensorDimensions>

Definition at line 15 of file InternalTypes.hpp.

◆ CopyAndImportFactoryPairs

using CopyAndImportFactoryPairs = std::map<ITensorHandleFactory::FactoryId, ITensorHandleFactory::FactoryId>

Definition at line 20 of file TensorHandleFactoryRegistry.hpp.

◆ DebugCallbackFunction

using DebugCallbackFunction = std::function<void(LayerGuid guid, unsigned int slotIndex, ITensorHandle* tensorHandle)>

Define the type of callback for the Debug layer to call.

Parameters

guid	- guid of layer connected to the input of the Debug layer
slotIndex	- index of the output slot connected to the input of the Debug layer
tensorHandle	- TensorHandle for the input tensor to the Debug layer

Definition at line 398 of file Types.hpp.

◆ DepthToSpaceDescriptor

typedef SpaceToDepthDescriptor DepthToSpaceDescriptor

A DepthToSpaceDescriptor for the DepthToSpaceLayer.

Definition at line 1099 of file Descriptors.hpp.

◆ Dimensions

using Dimensions = std::array<unsigned int, MaxNumOfTensorDimensions>

Definition at line 16 of file InternalTypes.hpp.

◆ DynamicBackendPtr

using DynamicBackendPtr = std::unique_ptr<DynamicBackend>

Definition at line 54 of file DynamicBackend.hpp.

◆ FactoryId

typedef ITensorHandleFactory::FactoryId FactoryId

Definition at line 12 of file MockTensorHandleFactory.cpp.

◆ Float16ToFloat32Workload

using Float16ToFloat32Workload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32>

Definition at line 254 of file Workload.hpp.

◆ Float32ToBFloat16Workload

using Float32ToBFloat16Workload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::Float32, armnn::DataType::BFloat16>

Definition at line 249 of file Workload.hpp.

◆ Float32ToFloat16Workload

using Float32ToFloat16Workload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Float16>

Definition at line 259 of file Workload.hpp.

◆ Float32Workload

using Float32Workload = TypedWorkload<QueueDescriptor, armnn::DataType::Float32>

Definition at line 220 of file Workload.hpp.

◆ FloatWorkload

using FloatWorkload = TypedWorkload<QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32>

Definition at line 217 of file Workload.hpp.

◆ Half

typedef half_float::half Half

Definition at line 22 of file Half.hpp.

◆ HighResolutionClock

using HighResolutionClock = std::chrono::high_resolution_clock::time_point

Define a timer and associated inference ID for recording execution times.

Definition at line 401 of file Types.hpp.

◆ IBackendContextUniquePtr

using IBackendContextUniquePtr = std::unique_ptr<IBackendContext>

Definition at line 34 of file IBackendContext.hpp.

◆ IBackendInternalUniquePtr

typedef std::unique_ptr< IBackendInternal > IBackendInternalUniquePtr

Definition at line 32 of file BackendRegistry.hpp.

◆ IBackendSharedPtr

using IBackendSharedPtr = std::shared_ptr<IBackend>

Definition at line 282 of file Types.hpp.

◆ IBackendUniquePtr

using IBackendUniquePtr = std::unique_ptr<IBackend, void(*)(IBackend* backend)>

Definition at line 283 of file Types.hpp.

◆ IGpuAccTunedParametersPtr

using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>

The following API is replaced by the backend options API.

Definition at line 300 of file IRuntime.hpp.

◆ IInitialiseProfilingService

using IInitialiseProfilingService = arm::pipe::IInitialiseProfilingService

Definition at line 28 of file Runtime.hpp.

◆ ILayerSupportSharedPtr

using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>

Definition at line 40 of file ILayerSupport.hpp.

◆ IMemoryManagerUniquePtr

using IMemoryManagerUniquePtr = std::unique_ptr<IMemoryManager>

Definition at line 24 of file IMemoryManager.hpp.

◆ ImportedInputId

using ImportedInputId = unsigned int

Definition at line 310 of file Types.hpp.

◆ ImportedOutputId

using ImportedOutputId = unsigned int

Definition at line 311 of file Types.hpp.

◆ INetworkPtr

using INetworkPtr = std::unique_ptr<INetwork, void(*)(INetwork* network)>

Definition at line 339 of file INetwork.hpp.

◆ InferenceTimingPair

using InferenceTimingPair = std::pair<HighResolutionClock, HighResolutionClock>

Definition at line 402 of file Types.hpp.

◆ InputQueueDescriptor

using InputQueueDescriptor = MemCopyQueueDescriptor

Definition at line 91 of file WorkloadData.hpp.

◆ InputTensors

using InputTensors = std::vector<std::pair<LayerBindingId, class ConstTensor> >

Definition at line 394 of file Tensor.hpp.

◆ Int32Workload

using Int32Workload = TypedWorkload<QueueDescriptor, armnn::DataType::Signed32>

Definition at line 226 of file Workload.hpp.

◆ IOptimizedNetworkPtr

using IOptimizedNetworkPtr = std::unique_ptr<IOptimizedNetwork, void(*)(IOptimizedNetwork* network)>

Definition at line 340 of file INetwork.hpp.

◆ IReportStructure

using IReportStructure = arm::pipe::IReportStructure

Definition at line 27 of file Runtime.hpp.

◆ IRuntimePtr

using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>

Definition at line 41 of file IRuntime.hpp.

◆ LayerBindingId

using LayerBindingId = int

Type of identifiers for bindable layers (inputs, outputs).

Definition at line 309 of file Types.hpp.

◆ LayerPriority

using LayerPriority = unsigned int

Definition at line 227 of file Layer.hpp.

◆ LayerTypeOf

using LayerTypeOf = typename LayerTypeOfImpl<Type>::Type

Definition at line 94 of file LayersFwd.hpp.

◆ LoadedNetworks

using LoadedNetworks = std::unordered_map<NetworkId, std::unique_ptr<LoadedNetwork> >

Definition at line 26 of file Runtime.hpp.

◆ LogSoftmaxDescriptor

typedef SoftmaxDescriptor LogSoftmaxDescriptor

A LogSoftmaxDescriptor for the LogSoftmaxLayer.

Definition at line 196 of file Descriptors.hpp.

◆ MemoryOptimizerStrategiesMapRef

using MemoryOptimizerStrategiesMapRef = std::unordered_map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy> >

Definition at line 33 of file BackendRegistry.hpp.

◆ MemorySourceFlags

using MemorySourceFlags = unsigned int

Definition at line 15 of file MemorySources.hpp.

◆ MergerDescriptor

using MergerDescriptor = OriginsDescriptor

MergerDescriptor is deprecated, use ConcatDescriptor instead.

Definition at line 63 of file DescriptorsFwd.hpp.

◆ MergerQueueDescriptor

using MergerQueueDescriptor = ConcatQueueDescriptor

Definition at line 149 of file WorkloadData.hpp.

◆ ModelOptions

using ModelOptions = std::vector<BackendOptions>

Definition at line 18 of file BackendOptions.hpp.

◆ NetworkId

typedef int NetworkId

Definition at line 35 of file IRuntime.hpp.

◆ NetworkImplPtr

using NetworkImplPtr = std::unique_ptr<NetworkImpl, void (*)(NetworkImpl* network)>

Definition at line 29 of file Network.hpp.

◆ NetworkOptions

using NetworkOptions = std::vector<BackendOptions>

Definition at line 16 of file BackendOptions.hpp.

◆ OutputQueueDescriptor

using OutputQueueDescriptor = MemCopyQueueDescriptor

Definition at line 92 of file WorkloadData.hpp.

◆ OutputTensors

using OutputTensors = std::vector<std::pair<LayerBindingId, class Tensor> >

Definition at line 395 of file Tensor.hpp.

◆ ParameterStringifyFunction

using ParameterStringifyFunction = std::function<void(const std::string& name, const std::string& value)>

Definition at line 14 of file SerializeLayerParameters.hpp.

◆ PreCompiledObjectDeleter

using PreCompiledObjectDeleter = std::function<void(const void*)>

Definition at line 19 of file PreCompiledLayer.hpp.

◆ PreCompiledObjectPtr

using PreCompiledObjectPtr = std::unique_ptr<void, PreCompiledObjectDeleter>

Definition at line 20 of file PreCompiledLayer.hpp.

◆ RefAdditionWorkload

using RefAdditionWorkload = RefElementwiseWorkload<std::plus<DataType>, AdditionQueueDescriptor, StringMapping::RefAdditionWorkload_Execute>

Definition at line 40 of file RefElementwiseWorkload.hpp.

◆ RefDebugBFloat16Workload

using RefDebugBFloat16Workload = RefDebugWorkload<DataType::BFloat16>

Definition at line 42 of file RefDebugWorkload.hpp.

◆ RefDebugFloat16Workload

using RefDebugFloat16Workload = RefDebugWorkload<DataType::Float16>

Definition at line 43 of file RefDebugWorkload.hpp.

◆ RefDebugFloat32Workload

using RefDebugFloat32Workload = RefDebugWorkload<DataType::Float32>

Definition at line 44 of file RefDebugWorkload.hpp.

◆ RefDebugQAsymmS8Workload

using RefDebugQAsymmS8Workload = RefDebugWorkload<DataType::QAsymmS8>

Definition at line 46 of file RefDebugWorkload.hpp.

◆ RefDebugQAsymmU8Workload

using RefDebugQAsymmU8Workload = RefDebugWorkload<DataType::QAsymmU8>

Definition at line 45 of file RefDebugWorkload.hpp.

◆ RefDebugQSymmS16Workload

using RefDebugQSymmS16Workload = RefDebugWorkload<DataType::QSymmS16>

Definition at line 47 of file RefDebugWorkload.hpp.

◆ RefDebugQSymmS8Workload

using RefDebugQSymmS8Workload = RefDebugWorkload<DataType::QSymmS8>

Definition at line 48 of file RefDebugWorkload.hpp.

◆ RefDebugSigned32Workload

using RefDebugSigned32Workload = RefDebugWorkload<DataType::Signed32>

Definition at line 49 of file RefDebugWorkload.hpp.

◆ RefDebugSigned64Workload

using RefDebugSigned64Workload = RefDebugWorkload<DataType::Signed64>

Definition at line 50 of file RefDebugWorkload.hpp.

◆ RefDivisionWorkload

using RefDivisionWorkload = RefElementwiseWorkload<std::divides<DataType>, DivisionQueueDescriptor, StringMapping::RefDivisionWorkload_Execute>

Definition at line 58 of file RefElementwiseWorkload.hpp.

◆ RefMaximumWorkload

using RefMaximumWorkload = RefElementwiseWorkload<armnn::maximum<DataType>, MaximumQueueDescriptor, StringMapping::RefMaximumWorkload_Execute>

Definition at line 64 of file RefElementwiseWorkload.hpp.

◆ RefMinimumWorkload

using RefMinimumWorkload = RefElementwiseWorkload<armnn::minimum<DataType>, MinimumQueueDescriptor, StringMapping::RefMinimumWorkload_Execute>

Definition at line 70 of file RefElementwiseWorkload.hpp.

◆ RefMultiplicationWorkload

using RefMultiplicationWorkload = RefElementwiseWorkload<std::multiplies<DataType>, MultiplicationQueueDescriptor, StringMapping::RefMultiplicationWorkload_Execute>

Definition at line 52 of file RefElementwiseWorkload.hpp.

◆ RefPermuteBFloat16Workload

using RefPermuteBFloat16Workload = RefPermuteWorkload<DataType::BFloat16>

Definition at line 27 of file RefPermuteWorkload.hpp.

◆ RefPermuteFloat16Workload

using RefPermuteFloat16Workload = RefPermuteWorkload<DataType::Float16>

Definition at line 28 of file RefPermuteWorkload.hpp.

◆ RefPermuteFloat32Workload

using RefPermuteFloat32Workload = RefPermuteWorkload<DataType::Float32>

Definition at line 29 of file RefPermuteWorkload.hpp.

◆ RefPermuteQAsymm8Workload

using RefPermuteQAsymm8Workload = RefPermuteWorkload<DataType::QAsymmU8>

Definition at line 31 of file RefPermuteWorkload.hpp.

◆ RefPermuteQAsymmS8Workload

using RefPermuteQAsymmS8Workload = RefPermuteWorkload<DataType::QAsymmS8>

Definition at line 30 of file RefPermuteWorkload.hpp.

◆ RefPermuteQSymm16Workload

using RefPermuteQSymm16Workload = RefPermuteWorkload<DataType::QSymmS16>

Definition at line 32 of file RefPermuteWorkload.hpp.

◆ RefSubtractionWorkload

using RefSubtractionWorkload = RefElementwiseWorkload<std::minus<DataType>, SubtractionQueueDescriptor, StringMapping::RefSubtractionWorkload_Execute>

Definition at line 46 of file RefElementwiseWorkload.hpp.

◆ RefTransposeBFloat16Workload

using RefTransposeBFloat16Workload = RefTransposeWorkload<DataType::BFloat16>

Definition at line 27 of file RefTransposeWorkload.hpp.

◆ RefTransposeFloat16Workload

using RefTransposeFloat16Workload = RefTransposeWorkload<DataType::Float16>

Definition at line 28 of file RefTransposeWorkload.hpp.

◆ RefTransposeFloat32Workload

using RefTransposeFloat32Workload = RefTransposeWorkload<DataType::Float32>

Definition at line 29 of file RefTransposeWorkload.hpp.

◆ RefTransposeQAsymm8Workload

using RefTransposeQAsymm8Workload = RefTransposeWorkload<DataType::QAsymmU8>

Definition at line 31 of file RefTransposeWorkload.hpp.

◆ RefTransposeQAsymmS8Workload

using RefTransposeQAsymmS8Workload = RefTransposeWorkload<DataType::QAsymmS8>

Definition at line 30 of file RefTransposeWorkload.hpp.

◆ RefTransposeQSymm16Workload

using RefTransposeQSymm16Workload = RefTransposeWorkload<DataType::QSymmS16>

Definition at line 32 of file RefTransposeWorkload.hpp.

◆ ResolveType

using ResolveType = typename ResolveTypeImpl<DT>::Type

Definition at line 79 of file ResolveType.hpp.

◆ SplitterDescriptor

using SplitterDescriptor = ViewsDescriptor

Definition at line 64 of file DescriptorsFwd.hpp.

◆ TensorInfos

using TensorInfos = std::vector<TensorInfo>

Definition at line 152 of file BackendHelper.cpp.

◆ Uint8ToFloat32Workload

using Uint8ToFloat32Workload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Float32>

Definition at line 264 of file Workload.hpp.

◆ Uint8Workload

using Uint8Workload = TypedWorkload<QueueDescriptor, armnn::DataType::QAsymmU8>

Definition at line 223 of file Workload.hpp.

◆ UnidirectionalSequenceLstmDescriptor

typedef LstmDescriptor UnidirectionalSequenceLstmDescriptor

Definition at line 1169 of file Descriptors.hpp.

◆ WorkloadQueue

using WorkloadQueue = std::vector< std::unique_ptr<IWorkload> >

Definition at line 13 of file ExecutionFrame.hpp.

Enumeration Type Documentation

◆ ActivationFunction

enum ActivationFunction

strong

Enumerator
Sigmoid
TanH
Linear
ReLu
BoundedReLu	min(a, max(b, input)) ReLu1 & ReLu6.
SoftReLu
LeakyReLu
Abs
Sqrt
Square
Elu
HardSwish
Gelu

Definition at line 86 of file Types.hpp.

 {
     Sigmoid     = 0,
     TanH        = 1,
     Linear      = 2,
     ReLu        = 3,
     BoundedReLu = 4, ///< min(a, max(b, input)) ReLu1 & ReLu6.
     SoftReLu    = 5,
     LeakyReLu   = 6,
     Abs         = 7,
     Sqrt        = 8,
     Square      = 9,
     Elu         = 10,
     HardSwish   = 11,
     Gelu        = 12
 };

◆ ArgMinMaxFunction

enum ArgMinMaxFunction

strong

Enumerator
Min
Max

Definition at line 103 of file Types.hpp.

 {
     Min = 0,
     Max = 1
 };

◆ BackendCapability

enum BackendCapability : uint32_t

strong

BackendCapability class.

Enumerator
NonConstWeights	Constant weights can be accessed through the descriptors, On the other hand, non-const weights can be accessed through inputs.
AsyncExecution	Asynchronous Execution.

Definition at line 286 of file Types.hpp.

                              : uint32_t
 {
     /// Constant weights can be accessed through the descriptors,
     /// On the other hand, non-const weights can be accessed through inputs.
     NonConstWeights,
  
     /// Asynchronous Execution.
     AsyncExecution,
  
     // add new enum values here
 };

◆ BinaryOperation

enum BinaryOperation

strong

Enumerator
Add
Div
Maximum
Minimum
Mul
Sub
SqDiff
Power

Definition at line 138 of file Types.hpp.

 {
     Add     = 0,
     Div     = 1,
     Maximum = 2,
     Minimum = 3,
     Mul     = 4,
     Sub     = 5,
     SqDiff  = 6,
     Power   = 7
 };

◆ BoostLogSeverityMapping

enum BoostLogSeverityMapping

strong

Enumerator
trace
debug
info
warning
error
fatal

Definition at line 196 of file Logging.hpp.

 {
     trace,
     debug,
     info,
     warning,
     error,
     fatal
 };

◆ CapabilityClass

enum CapabilityClass

strong

Capability class to calculate in the GetCapabilities function so that only the capability in the scope can be choose to calculate.

Enumerator
PaddingRequired
FallbackImportDisabled
CapabilityClassMax

Definition at line 24 of file ITensorHandleFactory.hpp.

 {
     PaddingRequired = 1,
     FallbackImportDisabled = 2,
  
     // add new enum values here
  
     CapabilityClassMax = 254
 };

◆ ComparisonOperation

enum ComparisonOperation

strong

Enumerator
Equal
Greater
GreaterOrEqual
Less
LessOrEqual
NotEqual

Definition at line 109 of file Types.hpp.

 {
     Equal          = 0,
     Greater        = 1,
     GreaterOrEqual = 2,
     Less           = 3,
     LessOrEqual    = 4,
     NotEqual       = 5
 };

◆ Compute

enum Compute

strong

The Compute enum is now deprecated and it is now being replaced by BackendId.

Enumerator
Undefined
CpuRef	CPU Execution: Reference C++ kernels.
CpuAcc	CPU Execution: NEON: ArmCompute.
GpuAcc	GPU Execution: OpenCL: ArmCompute.

Definition at line 21 of file BackendId.hpp.

 {
     Undefined = 0,
     /// CPU Execution: Reference C++ kernels
     CpuRef    = 1,
     /// CPU Execution: NEON: ArmCompute
     CpuAcc    = 2,
     /// GPU Execution: OpenCL: ArmCompute
     GpuAcc    = 3
 };

◆ DataLayout

enum DataLayout

strong

Enumerator
NCHW
NHWC
NDHWC
NCDHW

Definition at line 62 of file Types.hpp.

 {
     NCHW = 1,
     NHWC = 2,
     NDHWC = 3,
     NCDHW = 4
 };

◆ DataType

enum DataType

strong

Enumerator
Float16
Float32
QAsymmU8
Signed32
Boolean
QSymmS16
QSymmS8
QAsymmS8
BFloat16
Signed64

Definition at line 48 of file Types.hpp.

 {
     Float16  = 0,
     Float32  = 1,
     QAsymmU8 = 2,
     Signed32 = 3,
     Boolean  = 4,
     QSymmS16 = 5,
     QSymmS8  = 6,
     QAsymmS8 = 7,
     BFloat16 = 8,
     Signed64 = 9,
 };

◆ Dimensionality

enum Dimensionality

strong

Enumerator
NotSpecified
Specified
Scalar

Definition at line 172 of file Types.hpp.

 {
     NotSpecified = 0,
     Specified    = 1,
     Scalar       = 2
 };

◆ EdgeStrategy

enum EdgeStrategy

strong

Enumerator
Undefined
DirectCompatibility	No strategy has been defined. Used internally to verify integrity of optimizations.
ExportToTarget	Destination backend can work directly with tensors on source backend.
CopyToTarget	Source backends tensor data can be exported to destination backend tensor without copy. Copy contents from source backend tensor to destination backend tensor.

Definition at line 104 of file ITensorHandleFactory.hpp.

 {
     Undefined,              /// No strategy has been defined. Used internally to verify integrity of optimizations.
     DirectCompatibility,    /// Destination backend can work directly with tensors on source backend.
     ExportToTarget,         /// Source backends tensor data can be exported to destination backend tensor without copy.
     CopyToTarget            /// Copy contents from source backend tensor to destination backend tensor.
 };

◆ FusedKernelType

enum FusedKernelType

strong

Enumerator
AddMulAdd

Definition at line 266 of file Types.hpp.

 {
     AddMulAdd  = 0
 };

◆ GraphEvent

enum GraphEvent

strong

Enumerator
LayerAdded
LayerErased

Definition at line 12 of file IGraphObservable.hpp.

 {
     LayerAdded,
     LayerErased
 };

◆ JsonObjectType

enum JsonObjectType

strong

Enumerator
Measurement
Event
ExecObjectDesc

Definition at line 20 of file JsonPrinter.hpp.

 {
     Measurement,
     Event,
     ExecObjectDesc
 };

◆ LayerType

enum LayerType

strong

When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.

Enumerator
X
Activation
Addition
ArgMinMax
BatchNormalization
BatchToSpaceNd
Comparison
Concat
Constant
ConvertFp16ToFp32
ConvertFp32ToFp16
Convolution2d
Debug
DepthToSpace
DepthwiseConvolution2d
Dequantize
DetectionPostProcess
Division
ElementwiseUnary
FakeQuantization
Fill
Floor
FullyConnected
Gather
Input
InstanceNormalization
L2Normalization
LogicalBinary
LogSoftmax
Lstm
QLstm
Map
Maximum
Mean
MemCopy
MemImport
Merge
Minimum
Multiplication
Normalization
Output
Pad
Permute
Pooling2d
PreCompiled
Prelu
Quantize
QuantizedLstm
Reshape
Rank
Resize
Reduce
Slice
Softmax
SpaceToBatchNd
SpaceToDepth
Splitter
Stack
StandIn
StridedSlice
Subtraction
Switch
Transpose
TransposeConvolution2d
Unmap
Cast
Shape
UnidirectionalSequenceLstm
ChannelShuffle
Convolution3d
Pooling3d
GatherNd
BatchMatMul
ElementwiseBinary
ReverseV2
Tile
Fused
BroadcastTo
FirstLayer
LastLayer

Definition at line 491 of file Types.hpp.

 {
 #define X(name) name,
     LIST_OF_LAYER_TYPE
 #undef X
     FirstLayer = Activation,
     LastLayer = BroadcastTo
 };

◆ LogicalBinaryOperation

enum LogicalBinaryOperation

strong

Enumerator
LogicalAnd
LogicalOr

Definition at line 119 of file Types.hpp.

 {
     LogicalAnd = 0,
     LogicalOr  = 1
 };

◆ LogSeverity

enum LogSeverity

strong

Enumerator
Trace
Debug
Info
Warning
Error
Fatal

Definition at line 13 of file Utils.hpp.

 {
     Trace,
     Debug,
     Info,
     Warning,
     Error,
     Fatal
 };

◆ MemBlockStrategyType

enum MemBlockStrategyType

strong

Enumerator
SingleAxisPacking
MultiAxisPacking

Definition at line 253 of file Types.hpp.

 {
     // MemBlocks can be packed on the Y axis only, overlap allowed on X axis.
     // In other words MemBlocks with overlapping lifetimes cannot use the same MemBin,
     // equivalent to blob or pooling memory management.
     SingleAxisPacking  = 0,
  
     // MemBlocks can be packed on either Y or X axis but cannot overlap on both.
     // In other words MemBlocks with overlapping lifetimes can use the same MemBin,
     // equivalent to offset or slab memory management.
     MultiAxisPacking  = 1
 };

◆ MemorySource

enum MemorySource : uint32_t

strong

Define the Memory Source to reduce copies.

Enumerator
Undefined
Malloc
DmaBuf
DmaBufProtected
Gralloc

Definition at line 244 of file Types.hpp.

                         : uint32_t
 {
     Undefined = 0,
     Malloc = 1,
     DmaBuf = 2,
     DmaBufProtected = 4,
     Gralloc = 8
 };

◆ NormalizationAlgorithmChannel

enum NormalizationAlgorithmChannel

strong

Enumerator
Across
Within

Definition at line 207 of file Types.hpp.

 {
     Across = 0,
     Within = 1
 };

◆ NormalizationAlgorithmMethod

enum NormalizationAlgorithmMethod

strong

Enumerator
LocalBrightness	Krichevsky 2012: Local Brightness Normalization.
LocalContrast	Jarret 2009: Local Contrast Normalization.

Definition at line 213 of file Types.hpp.

 {
     /// Krichevsky 2012: Local Brightness Normalization
     LocalBrightness = 0,
     /// Jarret 2009: Local Contrast Normalization
     LocalContrast = 1
 };

◆ OutputShapeRounding

enum OutputShapeRounding

strong

Enumerator
Floor
Ceiling

Definition at line 221 of file Types.hpp.

 {
     Floor       = 0,
     Ceiling     = 1
 };

◆ PaddingMethod

enum PaddingMethod

strong

The padding method modifies the output of pooling layers.

In both supported methods, the values are ignored (they are not even zeroes, which would make a difference for max pooling a tensor with negative values). The difference between IgnoreValue and Exclude is that the former counts the padding fields in the divisor of Average and L2 pooling, while Exclude does not.

Enumerator
IgnoreValue	The padding fields count, but are ignored.
Exclude	The padding fields don't count and are ignored.

Definition at line 188 of file Types.hpp.

 {
     /// The padding fields count, but are ignored
     IgnoreValue = 0,
     /// The padding fields don't count and are ignored
     Exclude     = 1
 };

◆ PaddingMode

enum PaddingMode

strong

The padding mode controls whether the padding should be filled with constant values (Constant), or reflect the input, either including the border values (Symmetric) or not (Reflect).

Enumerator
Constant
Reflect
Symmetric

Definition at line 200 of file Types.hpp.

 {
     Constant  = 0,
     Reflect   = 1,
     Symmetric = 2
 };

◆ PoolingAlgorithm

enum PoolingAlgorithm

strong

Enumerator
Max
Average
L2

Definition at line 150 of file Types.hpp.

 {
     Max     = 0,
     Average = 1,
     L2      = 2
 };

◆ ProfilingDetailsMethod

enum ProfilingDetailsMethod

strong

Define the behaviour of the internal profiler when outputting network details.

Enumerator
Undefined
DetailsWithEvents
DetailsOnly

Definition at line 71 of file Types.hpp.

 {
     Undefined = 0,
     DetailsWithEvents = 1,
     DetailsOnly = 2
 };

◆ QosExecPriority

enum QosExecPriority

strong

Enumerator
Low
Medium
High

Definition at line 79 of file Types.hpp.

 {
     Low    = 0,
     Medium = 1,
     High   = 2
 };

◆ ReduceOperation

enum ReduceOperation

strong

Enumerator
Sum
Max
Mean
Min
Prod

Definition at line 157 of file Types.hpp.

 {
     Sum  = 0,
     Max  = 1,
     Mean = 2,
     Min  = 3,
     Prod = 4
 };

◆ ResizeMethod

enum ResizeMethod

strong

Enumerator
Bilinear
NearestNeighbor

Definition at line 166 of file Types.hpp.

 {
     Bilinear        = 0,
     NearestNeighbor = 1
 };

◆ ShapeInferenceMethod

enum ShapeInferenceMethod

strong

The ShapeInferenceMethod modify how the output shapes are treated.

When ValidateOnly is selected, the output shapes are inferred from the input parameters of the layer and any mismatch is reported. When InferAndValidate is selected 2 actions are performed: (1)infer output shape from inputs and (2)validate the shapes as in ValidateOnly. This option has been added to work with tensors which rank or dimension sizes are not specified explicitly, however this information can be calculated from the inputs.

Enumerator
ValidateOnly	Validate all output shapes.
InferAndValidate	Infer missing output shapes and validate all output shapes.

Definition at line 235 of file Types.hpp.

 {
     /// Validate all output shapes
     ValidateOnly     = 0,
     /// Infer missing output shapes and validate all output shapes
     InferAndValidate = 1
 };

◆ Status

enum Status

strong

enumeration

Enumerator
Success
Failure

Definition at line 42 of file Types.hpp.

 {
     Success = 0,
     Failure = 1
 };

◆ TuningLevel

enum TuningLevel

strong

Enumerator
None
Rapid
Normal
Exhaustive

Definition at line 18 of file ArmComputeTuningUtils.hpp.

 {
     None,
     Rapid,
     Normal,
     Exhaustive
 };

◆ UnaryOperation

enum UnaryOperation

strong

Enumerator
Abs
Exp
Sqrt
Rsqrt
Neg
LogicalNot
Log
Sin
Ceil

Definition at line 125 of file Types.hpp.

 {
     Abs        = 0,
     Exp        = 1,
     Sqrt       = 2,
     Rsqrt      = 3,
     Neg        = 4,
     LogicalNot = 5,
     Log        = 6,
     Sin        = 7,
     Ceil       = 8
 };

Function Documentation

◆ Activation() [1/2]

void Activation	(	Decoder< float > &	in,
		Encoder< float > &	out,
		const TensorInfo &	tensorInfo,
		ActivationFunction	function,
		float	a,
		float	b
	)

Definition at line 102 of file Activation.cpp.

 {
     unsigned int numElements = tensorInfo.GetNumElements();
  
     for (unsigned int i = 0; i < numElements; i++)
     {
         out.Set(Activation(in.Get(), function, a, b));
         ++in;
         ++out;
     }
     in -= numElements;
     out -= numElements;
 }

References Activation(), Decoder< IType >::Get(), TensorInfo::GetNumElements(), and Encoder< IType >::Set().

◆ Activation() [2/2]

float Activation	(	float	in,
		ActivationFunction	function,
		float	a,
		float	b
	)

Definition at line 13 of file Activation.cpp.

 {
     float output;
  
     // Compute the result of the activation function.
     switch (function)
     {
         case ActivationFunction::Linear:
         {
             output = a * in + b;
             break;
         }
         case ActivationFunction::Sigmoid:
         {
             output = 1.f / (1.f + expf(-in));
             break;
         }
         case ActivationFunction::ReLu:
         {
             output = std::max(0.f, in);
             break;
         }
         case ActivationFunction::BoundedReLu:
         {
             output = std::min(a, std::max(b, in));
             break;
         }
         case ActivationFunction::SoftReLu:
         {
             output = logf(1.0f + expf(in));
             break;
         }
         case ActivationFunction::LeakyReLu:
         {
             output = in > 0.0f ? in : (in * a);
             break;
         }
         case ActivationFunction::Abs:
         {
             output = in < 0 ? -in : in;
             break;
         }
         case ActivationFunction::Sqrt:
         {
             output = sqrtf(in);
             break;
         }
         case ActivationFunction::Square:
         {
             output = in * in;
             break;
         }
         case ActivationFunction::TanH:
         {
             output = a * tanhf(b * in);
             break;
         }
         case ActivationFunction::Elu:
         {
             output = (in >= 0) ? in : a * (expf(in) - 1);
             break;
         }
         case ActivationFunction::HardSwish:
         {
             // hard_swish(x) = x * relu6(x+3) / 6
             // relu6(x) = min(max(x,0),6)
             output = in * (std::min(std::max((in + 3),0.0f),6.0f)) / 6;
             break;
         }
         case ActivationFunction::Gelu:
         {
             // gelu(x) = x * 1/2 * (1 + erf(x / sqrt(2))),
             // where erf is Gaussian error function
             output = in * (0.5f * (1.0f + erff(static_cast<float>(in / std::sqrt(2)))));
             break;
         }
         default:
         {
             throw InvalidArgumentException("Unsupported activation function");
         }
     }
  
     return output;
 }

References Abs, BoundedReLu, Elu, Gelu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by Activation(), and LstmImpl().

◆ AllocateOutputData()

void armnn::AllocateOutputData	(	unsigned int	numOutput,
		unsigned int	numSelected,
		const std::vector< float > &	boxCorners,
		const std::vector< unsigned int > &	outputIndices,
		const std::vector< unsigned int > &	selectedBoxes,
		const std::vector< unsigned int > &	selectedClasses,
		const std::vector< float > &	selectedScores,
		float *	detectionBoxes,
		float *	detectionScores,
		float *	detectionClasses,
		float *	numDetections
	)

Definition at line 103 of file DetectionPostProcess.cpp.

 {
     for (unsigned int i = 0; i < numOutput; ++i)
         {
             unsigned int boxIndex = i * 4;
             if (i < numSelected)
             {
                 unsigned int boxCornorIndex = selectedBoxes[outputIndices[i]] * 4;
                 detectionScores[i] = selectedScores[outputIndices[i]];
                 detectionClasses[i] = armnn::numeric_cast<float>(selectedClasses[outputIndices[i]]);
                 detectionBoxes[boxIndex] = boxCorners[boxCornorIndex];
                 detectionBoxes[boxIndex + 1] = boxCorners[boxCornorIndex + 1];
                 detectionBoxes[boxIndex + 2] = boxCorners[boxCornorIndex + 2];
                 detectionBoxes[boxIndex + 3] = boxCorners[boxCornorIndex + 3];
             }
             else
             {
                 detectionScores[i] = 0.0f;
                 detectionClasses[i] = 0.0f;
                 detectionBoxes[boxIndex] = 0.0f;
                 detectionBoxes[boxIndex + 1] = 0.0f;
                 detectionBoxes[boxIndex + 2] = 0.0f;
                 detectionBoxes[boxIndex + 3] = 0.0f;
             }
         }
         numDetections[0] = armnn::numeric_cast<float>(numSelected);
 }

Referenced by DetectionPostProcess().

◆ AllTypesAreEqualImpl() [1/2]

bool armnn::AllTypesAreEqualImpl	(	T	t1,
		T	t2,
		Rest...	rest
	)

Definition at line 65 of file LayerSupportRules.hpp.

 {
     static_assert(std::is_same<T, TensorInfo>::value, "Type T must be a TensorInfo");
  
     return (t1.GetDataType() == t2.GetDataType()) && AllTypesAreEqualImpl(t2, rest...);
 }

References AllTypesAreEqualImpl().

◆ AllTypesAreEqualImpl() [2/2]

bool armnn::AllTypesAreEqualImpl ( T )

Definition at line 59 of file LayerSupportRules.hpp.

 {
     return true;
 }

Referenced by AllTypesAreEqualImpl(), and TypesAreEqual::TypesAreEqual().

◆ Append() [1/2]

void armnn::Append	(	Optimizer::Optimizations &	optimizations,
		Front &&	front,
		Others &&...	others
	)

Definition at line 36 of file Optimizer.hpp.

 {
     Append<Front>(optimizations, std::forward<Front>(front));
     Append<Others...>(optimizations, std::forward<Others>(others)...);
 };

References Append().

◆ Append() [2/2]

void armnn::Append	(	Optimizer::Optimizations &	optimizations,
		T &&	optimization
	)

Definition at line 30 of file Optimizer.hpp.

 {
     optimizations.emplace_back(new T(optimization));
 };

Referenced by Append(), and MakeOptimizations().

◆ ApplyBackendOptimizations()

OptimizationResult armnn::ApplyBackendOptimizations	(	OptimizedNetworkImpl *	optNetObjPtr,
		BackendSettings &	backendSettings,
		BackendsMap &	backends,
		const ModelOptions &	modelOptions,
		Optional< std::vector< std::string > & >	errMessages
	)

Definition at line 1328 of file Network.cpp.

 {
     ARMNN_ASSERT(optNetObjPtr);
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ApplyBackendOptimizations")
     OptimizationResult result;
  
     // Get the optimized graph
     Graph& optGraph = optNetObjPtr->GetGraph();
  
     // Run backend specific optimizations
     for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
     {
         auto backendObjPtr = backends.find(selectedBackend)->second.get();
         ARMNN_ASSERT(backendObjPtr);
  
         if (selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc)
         {
             Optimizer::Pass(optGraph, MakeOptimizations(optimizations::PermuteDepthwiseConv2dWeights()));
             Optimizer::Pass(optGraph, MakeOptimizations(optimizations::FusePermuteIntoConstLayer()));
         }
  
         // Select sub-graphs based on backend
         SubgraphViewSelector::Subgraphs subgraphs =
                 SubgraphViewSelector::SelectSubgraphs(optGraph,
                                                       // Select layers assigned to the requested backend
                                                       [&backendObjPtr](const Layer& layer)
                                                       {
  
                                                           return layer.GetType() != LayerType::Input &&
                                                                  layer.GetType() != LayerType::Output &&
                                                                  layer.GetBackendId() == backendObjPtr->GetId();
                                                       });
         if (subgraphs.empty())
         {
             // No sub-graphs found, try with next selected backend
             continue;
         }
  
         // Try to optimize each sub-graph
         for (auto& subgraph : subgraphs)
         {
             // Try to optimize the current sub-graph
             ARMNN_SCOPED_PROFILING_EVENT(backendObjPtr->GetId(), "Optimizer_OptimizeSubgraph");
             OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph, modelOptions);
             ARMNN_ASSERT(optimizationViews.Validate(*subgraph));
  
             // Optimization attempted, check the resulting optimized sub-graph
             for (auto& substitution : optimizationViews.GetSubstitutions())
             {
                 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
                 SubgraphView& replacementSubgraph   = substitution.m_ReplacementSubgraph;
                 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
                 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
  
                 // Assign the current backend to the optimized sub-graph
                 const SubgraphView::IConnectableLayers& subgraphLayers = replacementSubgraph.GetIConnectableLayers();
                 std::for_each(subgraphLayers.begin(), subgraphLayers.end(), [&selectedBackend](IConnectableLayer* l)
                     {
                         ARMNN_ASSERT(l);
                         PolymorphicDowncast<Layer*>(l)->SetBackendId(selectedBackend);
                     });
             }
  
             // Remove deleted sub-graphs
             for (auto& deletedSubgraph : optimizationViews.GetDeletedSubgraphs())
             {
                 for (auto& l : deletedSubgraph.GetIConnectableLayers())
                 {
                     Layer* deletedLayer = PolymorphicDowncast<Layer*>(l);
                     for (unsigned int in = deletedLayer->GetNumInputSlots(); in > 0; --in)
                     {
                         auto inputSlot = deletedLayer->GetInputSlot(in -1);
                         OutputSlot* parentOut = inputSlot.GetConnectedOutputSlot();
                         parentOut->Disconnect(inputSlot);
                         for (unsigned int out = deletedLayer->GetOutputSlot(in -1).GetNumConnections(); out > 0; --out)
                         {
                             InputSlot* childIn = deletedLayer->GetOutputSlot(in - 1).GetConnection(out -1);
                             deletedLayer->GetOutputSlot(in - 1).Disconnect(*childIn);
                             parentOut->Connect(*childIn);
                         }
                     }
                     optGraph.EraseLayer(deletedLayer);
                 }
             }
  
             if (!optimizationViews.GetFailedSubgraphs().empty())
             {
                 std::stringstream warningMsg;
                 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
                 ReportWarning(warningMsg.str(), errMessages);
  
                 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
                 BackendSettings settingsCopy(backendSettings);
                 if (!backendObjPtr->GetId().IsCpuRef())
                 {
                     // Add the current backend to the list of backends to ignore
                     settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
                 }
  
                 int count=0;
                 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
                 {
                     // An error occurred: the optimization was attempted but not performed, try different backends
                     std::stringstream subgraphMsg;
                     subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetIConnectableLayers().size()
                                 << " layers inside sub-graph " << count++;
                     ReportWarning(subgraphMsg.str(), errMessages);
  
                     OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
                                                                            settingsCopy,
                                                                            *subgraph,
                                                                            errMessages);
                     if (reassignmentResult.m_Error)
                     {
                         // Failed to re-assign one of the remaining backends to each layer of the sub-graph
                         result.m_Error = true;
                         return result;
                     }
                 }
             }
         }
     }
  
     return result;
 }

References ARMNN_ASSERT, ARMNN_SCOPED_PROFILING_EVENT, AssignBackends(), OutputSlot::Connect(), CpuAcc, OutputSlot::Disconnect(), Graph::EraseLayer(), Layer::GetBackendId(), OutputSlot::GetConnection(), OptimizationViews::GetDeletedSubgraphs(), OptimizationViews::GetFailedSubgraphs(), OptimizedNetworkImpl::GetGraph(), SubgraphView::GetIConnectableLayers(), Layer::GetInputSlot(), OutputSlot::GetNumConnections(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OptimizationViews::GetSubstitutions(), Layer::GetType(), GpuAcc, Input, OptimizationResult::m_Error, BackendSettings::m_IgnoredBackends, BackendSettings::m_SelectedBackends, MakeOptimizations(), Output, Optimizer::Pass(), ReportWarning(), SubgraphViewSelector::SelectSubgraphs(), Graph::SubstituteSubgraph(), Undefined, and OptimizationViews::Validate().

Referenced by Optimize().

◆ ArgMinMax() [1/3]

template void armnn::ArgMinMax	(	Decoder< float > &	in,
		int32_t *	out,
		const TensorInfo &	inputTensorInfo,
		const TensorInfo &	outputTensorInfo,
		ArgMinMaxFunction	function,
		int	axis
	)

◆ ArgMinMax() [2/3]

template void armnn::ArgMinMax	(	Decoder< float > &	in,
		int64_t *	out,
		const TensorInfo &	inputTensorInfo,
		const TensorInfo &	outputTensorInfo,
		ArgMinMaxFunction	function,
		int	axis
	)

◆ ArgMinMax() [3/3]

void ArgMinMax	(	Decoder< float > &	in,
		OUT *	out,
		const TensorInfo &	inputTensorInfo,
		const TensorInfo &	outputTensorInfo,
		ArgMinMaxFunction	function,
		int	axis
	)

Definition at line 17 of file ArgMinMax.cpp.

 {
     IgnoreUnused(outputTensorInfo);
  
     unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis);
  
     const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis);
     const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis];
     const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(),
                                                                          uAxis + 1,
                                                                          inputTensorInfo.GetNumDimensions());
  
     for (unsigned int outer = 0; outer < outerElements; ++outer) {
         for (unsigned int inner = 0; inner < innerElements; ++inner) {
             in[outer * axisSize * innerElements + inner];
             auto tmpValue = in.Get();
             unsigned int tmpIndex = 0;
             for (unsigned int i = 1; i < axisSize; ++i) {
                 in[(outer * axisSize * innerElements) + (i * innerElements) + inner];
                 const auto& value = in.Get();
                 if ((function == armnn::ArgMinMaxFunction::Min && value < tmpValue) ||
                     (function == armnn::ArgMinMaxFunction::Max &&  value > tmpValue)) {
                     tmpValue = value;
                     tmpIndex = i;
                 }
             }
  
             out[outer * innerElements + inner] = armnn::numeric_cast<OUT>(tmpIndex);
         }
     }
 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), armnnUtils::GetUnsignedAxis(), IgnoreUnused(), Max, and Min.

◆ AssertNumberOfInputSlots()

void armnn::AssertNumberOfInputSlots ( Layer & layer )

Definition at line 28 of file Layer.cpp.

 {
     switch (layer.GetType())
     {
         case LayerType::Convolution2d:
         case LayerType::DepthwiseConvolution2d:
         case LayerType::FullyConnected:
         {
             ARMNN_ASSERT(layer.GetNumInputSlots() == 2 ||
                          layer.GetNumInputSlots() == 3);
             break;
         }
         default:
         {
             ARMNN_ASSERT(layer.GetNumInputSlots() == 1);
             break;
         }
     }
 }

References ARMNN_ASSERT, Convolution2d, DepthwiseConvolution2d, FullyConnected, Layer::GetNumInputSlots(), and Layer::GetType().

Referenced by InputSlot::Insert().

◆ AssignBackends() [1/3]

OptimizationResult AssignBackends	(	OptimizedNetworkImpl *	optNetObjPtr,
		BackendSettings &	backendSettings,
		Graph::Iterator &	firstLayer,
		Graph::Iterator &	lastLayer,
		Optional< std::vector< std::string > & >	errMessages
	)

Definition at line 1186 of file Network.cpp.

 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
     OptimizationResult result;
  
     auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
     if (availablePreferredBackends.empty())
     {
         std::stringstream failureMsg;
         failureMsg << "No preferred backends are available";
         ReportError(failureMsg.str(), errMessages);
  
         result.m_Error = true;
         return result;
     }
  
     for (auto it = firstLayer; it != lastLayer; ++it)
     {
         auto layer = PolymorphicDowncast<Layer*>(*it);
         std::vector<DataType> inOutDataType = GetLayerInOutDatatype(layer);
  
         // In AttemptBackendAssignment() we check:
         //     - if input/output datatypes of the layer are float16
         //     - if the layer is supported with these datatypes
         // If the layer is not supported (failing on ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED() in clframework),
         // we attempt to insert convertion layers either side of the new fp32 layer.
         bool isFloat16 = false;
         for (auto type : inOutDataType)
         {
             if (type == DataType::Float16)
             {
                 isFloat16 = true;
                 break;
             }
         }
  
         if (layer->GetBackendId() == "Unknown" || isFloat16)
         {
             AssignBackendsIConnectable(optNetObjPtr,
                                        *it,
                                        errMessages,
                                        result,
                                        backendSettings,
                                        availablePreferredBackends);
         }
     }
  
     for (auto it = firstLayer; it != lastLayer; ++it)
     {
         auto layer = PolymorphicDowncast<Layer*>(*it);
  
         if(layer->GetType() == LayerType::Input)
         {
             BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
             layer->SetBackendId(connectedBackendId);
         }
     }
  
     return result;
 }

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackendsIConnectable(), Float16, BackendSettings::GetAvailablePreferredBackends(), GetLayerInOutDatatype(), Input, OptimizationResult::m_Error, ReportError(), and Undefined.

Referenced by ApplyBackendOptimizations(), AssignBackends(), and Optimize().

◆ AssignBackends() [2/3]

OptimizationResult armnn::AssignBackends	(	OptimizedNetworkImpl *	optNetObjPtr,
		BackendSettings &	backendSettings,
		SubgraphView &	subgraph,
		Optional< std::vector< std::string > & >	errMessages
	)

Definition at line 1295 of file Network.cpp.

 {
     SubgraphView::IConnectableLayerIterator firstLayer = subgraph.begin();
     SubgraphView::IConnectableLayerIterator lastLayer  = subgraph.end();
     return AssignBackends(optNetObjPtr,
                           backendSettings,
                           firstLayer,
                           lastLayer,
                           errMessages);
 }

References AssignBackends(), SubgraphView::begin(), and SubgraphView::end().

◆ AssignBackends() [3/3]

OptimizationResult AssignBackends	(	OptimizedNetworkImpl *	optNetObjPtr,
		BackendSettings &	backendSettings,
		SubgraphView::IConnectableLayerIterator &	firstLayer,
		SubgraphView::IConnectableLayerIterator &	lastLayer,
		Optional< std::vector< std::string > & >	errMessages
	)

Definition at line 1251 of file Network.cpp.

 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
     OptimizationResult result;
  
     auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
     if (availablePreferredBackends.empty())
     {
         std::stringstream failureMsg;
         failureMsg << "No preferred backends are available";
         ReportError(failureMsg.str(), errMessages);
  
         result.m_Error = true;
         return result;
     }
  
     for (auto it = firstLayer; it != lastLayer; ++it)
     {
         AssignBackendsIConnectable(optNetObjPtr,
                                    *it,
                                    errMessages,
                                    result,
                                    backendSettings,
                                    availablePreferredBackends);
     }
  
     for (auto it = firstLayer; it != lastLayer; ++it)
     {
         auto layer = PolymorphicDowncast<Layer*>(*it);
  
         if(layer->GetType() == LayerType::Input)
         {
             BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
             layer->SetBackendId(connectedBackendId);
         }
     }
  
     return result;
 }

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackendsIConnectable(), BackendSettings::GetAvailablePreferredBackends(), Input, OptimizationResult::m_Error, ReportError(), and Undefined.

◆ AssignBackendsIConnectable()

void armnn::AssignBackendsIConnectable	(	OptimizedNetworkImpl *	optNetObjPtr,
		IConnectableLayer *	it,
		Optional< std::vector< std::string > & >	errMessages,
		OptimizationResult &	result,
		BackendSettings &	backendSettings,
		std::vector< BackendId > &	availablePreferredBackends
	)

Definition at line 1076 of file Network.cpp.

 {
     auto ReturnError = [&](const Layer* layer)
     {
         return ReturnWithError(result, layer, backendSettings, errMessages);
     };
  
     auto layer = PolymorphicDowncast<Layer*>(it);
  
     if (layer->GetType() == LayerType::Input)
     {
         return;
     }
  
     std::vector<DataType> inOutDataType = GetLayerInOutDatatype(layer);
  
     std::string reasonIfUnsupported;
     bool found = false;
     if (!CheckScaleSetOnQuantizedType(layer, errMessages))
     {
         // don't bomb immediately, find all the quantized outputs
         // which haven't had a scale set and report them all back.
         result.m_Error = true;
     }
  
     // First try assign layer to hint backend
     if (layer->GetBackendHint().has_value() &&
         backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
         AttemptBackendAssignment(backendSettings,
                                  optNetObjPtr->GetGraph(),
                                  layer,
                                  layer->GetBackendHint().value(),
                                  inOutDataType[0],
                                  inOutDataType[1],
                                  availablePreferredBackends,
                                  reasonIfUnsupported,
                                  errMessages).IsOk())
     {
         found = true;
         backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
     }
     else
     {
         // Try assign layer to prefered list of backends
         for (const auto& backend : availablePreferredBackends)
         {
             if (layer->GetBackendHint().has_value() &&
                 layer->GetBackendHint().value() == backend)
             {
                 continue; //Don't re-test the backend hint
             }
  
             OptimizationResult res = AttemptBackendAssignment(backendSettings,
                                                               optNetObjPtr->GetGraph(),
                                                               layer,
                                                               backend,
                                                               inOutDataType[0],
                                                               inOutDataType[1],
                                                               availablePreferredBackends,
                                                               reasonIfUnsupported,
                                                               errMessages);
  
             if (res.IsOk())
             {
                 found = true;
                 backendSettings.m_SelectedBackends.insert(backend);
                 break;
             }
             else if (res.IsError())
             {
                 result = res;  // Cannot continue.
                 // Note: we don't need to log the error as it would already
                 // be logged in AttemptBackendAssignment().
             }
             else
             {
                 ARMNN_ASSERT_MSG(res.IsWarningOnly(), "OptimizationResult in unexpected state.");
             }
         }
     }
  
     // If the layer is unsupported by any devices, log and return a null network.
     if (!found)
     {
         // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
         //       fallback we should set the compute device on the layer to CpuRef (these are not
         //       available as accelerated operations, or are only available under certain
         //       conditions, currently they comprise MemCopy, Constant, Permute)
         armnn::LayerType layerType = layer->GetType();
         if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
                                                 layerType == armnn::LayerType::Constant ||
                                                 layerType == armnn::LayerType::Permute))
         {
             BackendId cpuBackendId(armnn::Compute::CpuRef);
             layer->SetBackendId(cpuBackendId);
             backendSettings.m_SelectedBackends.insert(cpuBackendId);
         }
         else
         {
             result = ReturnError(layer);
         }
     }
  
 }

References ARMNN_ASSERT_MSG, AttemptBackendAssignment(), CheckScaleSetOnQuantizedType(), Constant, CpuRef, OptimizedNetworkImpl::GetGraph(), GetLayerInOutDatatype(), Input, BackendSettings::IsBackendSupported(), BackendSettings::IsCpuRefUsed(), OptimizationResult::IsError(), OptimizationResult::IsOk(), OptimizationResult::IsWarningOnly(), OptimizationResult::m_Error, BackendSettings::m_SelectedBackends, MemCopy, Permute, and ReturnWithError().

Referenced by AssignBackends().

◆ AssignSplitId()

void armnn::AssignSplitId	(	LayerSelectionInfo::LayerInfoContainer &	layerInfos,
		LayerSelectionInfo &	layerInfo
	)

Definition at line 309 of file SubgraphViewSelector.cpp.

 {
     // Check each input to see if we can attach ourselves to any of the subgraphs that have already been assigned.
     ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo)
     {
         // We can only attach ourselves to the subgraph from this input if there isn't a cut here.
         if (layerInfo.m_IsSelected == parentInfo.m_IsSelected)
         {
             // We also need to check that merging into this subgraph won't cause a dependency cycle between subgraphs.
             // This will be the case if the subgraph that we will become part of is already a dependency
             // of one of the subgraphs that are input to this layer, e.g:
             //
             //    0     |  The numbers (0, 1) are the subgraph IDs of each layer and we are looking at layer X.
             //   / \    |
             //  1   0   |  We can't merge X into subgraph 0, because the left-hand input already depends on subgraph 0.
             //   \ /    |  We can however merge X into subgraph 1.
             //    X     |
             //
             bool dependenciesOk = true;
             ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& otherParentInfo)
             {
                 // We call HasAntecedent() ~ n^2 times, where n is the number of inputs to this layer.
                 // Hence it is important that this is efficient - see PartialSubgraph class description.
                 if (otherParentInfo.m_Subgraph->HasAntecedent(parentInfo.m_Subgraph.get()))
                 {
                     dependenciesOk = false;
                 }
             });
  
             if (dependenciesOk)
             {
                 // Merge into the subgraph of this input. If we have already been merged into another subgraph
                 // (from another input of this layer), then merge both of them together.
                 if (layerInfo.m_Subgraph == nullptr)
                 {
                     layerInfo.m_Subgraph = parentInfo.m_Subgraph;
                 }
                 else
                 {
                     // We call MergeWith() ~ n times, where n is the number of inputs to this layer.
                     // Therefore it does not need to be as performant as HasAntecedent().
                     layerInfo.m_Subgraph->MergeWith(parentInfo.m_Subgraph.get());
                 }
             }
         }
     });
  
     // If we weren't able to merge into an existing subgraph then we need to make a new one
     if (layerInfo.m_Subgraph == nullptr)
     {
         layerInfo.m_Subgraph = std::make_shared<PartialSubgraph>();
     }
  
     // Record dependencies of the chosen subgraph based on the inputs of this layer.
     ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo)
     {
         // These functions are called ~n times, where n is the number of inputs to this layer.
         // Therefore it does not need to be as performant as HasAntecedent().
         if (!layerInfo.m_Subgraph->IsMergedWith(parentInfo.m_Subgraph.get()))
         {
             layerInfo.m_Subgraph->AddDirectAntecedent(parentInfo.m_Subgraph.get());
         }
     });
 }

References ForEachLayerInput().

Referenced by SubgraphViewSelector::SelectSubgraphs().

◆ AttemptBackendAssignment()

OptimizationResult armnn::AttemptBackendAssignment	(	BackendSettings &	backendSettings,
		Graph &	graph,
		Layer *	layer,
		BackendId	backend,
		DataType	dataTypeIn,
		DataType	dataTypeOut,
		const std::vector< BackendId > &	availablePreferredBackends,
		std::string &	reasonIfUnsupported,
		Optional< std::vector< std::string > & >	errMessages
	)

Definition at line 847 of file Network.cpp.

 {
     OptimizationResult result;
  
     // Helper lambda to compose meaningful error message before returning with error
     auto ReturnError = [&](const Layer* layer)
         {
             return ReturnWithError(result, layer, backendSettings, errMessages);
         };
  
     // need to set the compute device on the layer
     // before we can check if it is supported
     layer->SetBackendId(backend);
     std::string currentReasonIfUnsupported;
  
     // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture 
     // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs
     // to be FP32 and inserting convert layers around the FP32 operator.
     bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), currentReasonIfUnsupported);
     reasonIfUnsupported += currentReasonIfUnsupported;
     // This string matches the error message that is produced by acl when attempting to run FP16 kernels on
     // a cpu or build that does not have fp16 support. We use this to check if we should add
     // conversion layers or not.
     std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
     if (!isLayerSupported || currentReasonIfUnsupported.find(checkStr) != std::string::npos)
     {
         if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
         {
             if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
                 && layer->GetType() != LayerType::ConvertFp32ToFp16
                 && layer->GetType() != LayerType::ConvertFp16ToFp32)
             {
                 auto ConstantLayerFromFp16ToFp32 = [](Layer& layer)
                 {
                     if (layer.GetType() == LayerType::Constant)
                     {
                         ConstantLayer* constantLayer = PolymorphicDowncast<ConstantLayer*>(&layer);
  
                         auto& info = constantLayer->m_LayerOutput->GetTensorInfo();
  
                         if (info.GetDataType() == DataType::Float16)
                         {
                             std::vector<float> newValues(info.GetNumElements());
  
                             armnnUtils::FloatingPointConverter::ConvertFloat16To32(
                                     constantLayer->m_LayerOutput->GetConstTensor<Half>(),
                                     info.GetNumElements(),
                                     newValues.data());
  
                             TensorInfo newInfo(info);
                             newInfo.SetDataType(DataType::Float32);
                             ConstTensor newInput(newInfo, newValues);
                             constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
  
                             layer.GetOutputSlot(0).SetTensorInfo(newInfo);
                         }
                     }
                 };
  
                 bool checkType = false;
  
                 for (auto inputSlot : layer->GetInputSlots())
                 {
                     auto connectedOutputSlot = inputSlot.GetConnectedOutputSlot();
                     if (connectedOutputSlot->GetOwningLayer().GetType() == LayerType::Constant)
                     {
                         if (connectedOutputSlot->GetNumConnections() == 1)
                         {
                             checkType = true;
                             ConstantLayerFromFp16ToFp32(connectedOutputSlot->GetOwningLayer());
                         }
                     }
                 }
  
                 // Insert FP16 -> FP32 conversion layer before current layer
                 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
                 if (dataTypeIn == DataType::Float16)
                 {
                     convertFp16ToFp32Layers =
                             InsertConvertFp16ToFp32LayersBefore(graph, *layer, checkType);
                 }
  
                 // Insert FP32 -> FP16 conversion layer after current layer
                 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
                 if (dataTypeOut == DataType::Float16)
                 {
                     convertFp32ToFp16Layers =
                         InsertConvertFp32ToFp16LayersAfter(graph, *layer);
                 }
  
                 // Assign a supported backend to the newly introduced conversion layers
                 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
                     {
                         bool supportedBackendFound = false;
                         std::string reasonIfUnsupported;
  
                         // Try preferred backend first
                         layer->SetBackendId(preferredBackend);
                         if (IWorkloadFactory::IsLayerSupported(*layer,
                                                                EmptyOptional(),
                                                                reasonIfUnsupported))
                         {
                             supportedBackendFound = true;
                         }
                         else
                         {
                             for (const auto& backend : availablePreferredBackends)
                             {
                                 // Skip preferred backend (we already determined that it is not supported)
                                 if (backend == preferredBackend)
                                 {
                                     continue;
                                 }
  
                                 layer->SetBackendId(backend);
                                 if (IWorkloadFactory::IsLayerSupported(*layer,
                                                                        EmptyOptional(),
                                                                        reasonIfUnsupported))
                                 {
                                     supportedBackendFound = true;
                                     break;
                                 }
                             }
                         }
  
                         return supportedBackendFound;
                     };
  
                 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
                 {
                     if (!AssignFirstSupportedBackend(convertLayer, backend))
                     {
                         return ReturnError(convertLayer);
                     }
                 }
  
                 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
                 {
                     if (!AssignFirstSupportedBackend(convertLayer, backend))
                     {
                         return ReturnError(convertLayer);
                     }
                 }
  
                 return result;
             }
         }
  
         std::stringstream warningMsg;
         warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
                    << " is not supported on requested backend " << layer->GetBackendId().Get()
                    << " for input data type " << GetDataTypeName(dataTypeIn)
                    << " and output data type " << GetDataTypeName(dataTypeOut)
                    << " (reason: " << reasonIfUnsupported
                    << "), falling back to the next backend.";
         ReportWarning(warningMsg.str(), errMessages);
  
         return OptimizationResult(true, false);
     }
     else
     {
         return result;
     }
 }

References Constant, FloatingPointConverter::ConvertFloat16To32(), ConvertFp16ToFp32, ConvertFp32ToFp16, Float16, Float32, BackendId::Get(), Layer::GetBackendId(), GetDataTypeName(), Layer::GetInputSlots(), GetLayerTypeAsCString(), Layer::GetOutputSlot(), Layer::GetType(), info, InsertConvertFp16ToFp32LayersBefore(), InsertConvertFp32ToFp16LayersAfter(), IWorkloadFactory::IsLayerSupported(), ConstantLayer::m_LayerOutput, ReportWarning(), ReturnWithError(), Layer::SetBackendId(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AssignBackendsIConnectable().

◆ BackendRegistryInstance()

BackendRegistry & BackendRegistryInstance ( )

Definition at line 15 of file BackendRegistry.cpp.

 {
     static BackendRegistry instance;
     return instance;
 }

Referenced by CreateBackendObject(), CreateSupportedBackends(), DynamicBackendUtils::DeregisterDynamicBackends(), GetCapability(), GetILayerSupportByBackendId(), GetNumberOfCacheFiles(), HasMatchingCapability(), ArmNNProfilingServiceInitialiser::InitialiseProfilingService(), DynamicBackendUtils::RegisterDynamicBackends(), RuntimeImpl::RuntimeImpl(), and RuntimeImpl::~RuntimeImpl().

◆ BatchNormImpl()

void BatchNormImpl	(	const BatchNormalizationQueueDescriptor &	data,
		Decoder< float > &	meanDecoder,
		Decoder< float > &	varianceDecoder,
		Decoder< float > &	betaDecoder,
		Decoder< float > &	gammaDecoder,
		Decoder< float > &	inputDecoder,
		Encoder< float > &	outputEncoder
	)

Definition at line 18 of file BatchNormImpl.cpp.

 {
     const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
     const TensorShape inputShape = inputInfo.GetShape();
  
     armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
  
     unsigned int inputBatches  = inputShape[0];
     unsigned int inputHeight   = inputShape[dataLayout.GetHeightIndex()];
     unsigned int inputWidth    = inputShape[dataLayout.GetWidthIndex()];
     unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
  
     for (unsigned int c = 0; c < inputChannels; c++)
     {
         meanDecoder[c];
         varianceDecoder[c];
         betaDecoder[c];
         gammaDecoder[c];
         float mean  = meanDecoder.Get();
         float var   = varianceDecoder.Get();
         float beta  = betaDecoder.Get();
         float gamma = gammaDecoder.Get();
  
         float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps);
         float add  = beta - mult * mean;
  
         for (unsigned int n = 0; n < inputBatches; n++)
         {
             for (unsigned int h = 0; h < inputHeight; h++)
             {
                 for (unsigned int w = 0; w < inputWidth; w++)
                 {
                     unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
                     inputDecoder[index];
                     outputEncoder[index];
                     outputEncoder.Set(mult * inputDecoder.Get() + add);
                 }
             }
         }
     }
 }

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), GetTensorInfo(), DataLayoutIndexed::GetWidthIndex(), BatchNormalizationDescriptor::m_DataLayout, BatchNormalizationDescriptor::m_Eps, QueueDescriptor::m_Inputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

◆ BatchToSpaceNd()

void BatchToSpaceNd	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const BatchToSpaceNdDescriptor &	params,
		Decoder< float > &	inputData,
		Encoder< float > &	outputData
	)

Definition at line 50 of file BatchToSpaceNd.cpp.

 {
     unsigned int rank = inputInfo.GetNumDimensions();
     if (rank != 3 && rank != 4 )
     {
         throw InvalidArgumentException("Tensor rank must be either 3 or 4, but it is " + std::to_string(rank),
                                        CHECK_LOCATION());
     }
  
     DataLayoutIndexed dataLayout = params.m_DataLayout;
     unsigned int channelDimension3D = params.m_DataLayout == DataLayout::NCHW ? 1 : 2;
  
     TensorShape inputShape = inputInfo.GetShape();
     TensorShape outputShape = outputInfo.GetShape();
  
     const unsigned int inputBatchSize  = inputShape[0];
     const unsigned int outputBatchSize = outputShape[0];
  
     const unsigned int channels = (rank == 3) ? inputShape[channelDimension3D]
                                               : inputShape[dataLayout.GetChannelsIndex()];
  
     const unsigned int inputHeight  = inputShape[dataLayout.GetHeightIndex()];
     const unsigned int inputWidth   = (rank == 3) ? 1 : inputShape[dataLayout.GetWidthIndex()];
     const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
     const unsigned int outputWidth  = (rank == 3) ? 1 : outputShape[dataLayout.GetWidthIndex()];
  
     const unsigned int blockHeight = params.m_BlockShape[0];
     const unsigned int blockWidth  = (rank == 3) ? 1 : params.m_BlockShape[1];
  
     const unsigned int cropsTop  = params.m_Crops[0].first;
     const unsigned int cropsLeft = (rank == 3) ? 0 : params.m_Crops[1].first;
  
     for (unsigned int inBatch = 0; inBatch < inputBatchSize; ++inBatch)
     {
         const unsigned int outBatch = inBatch % outputBatchSize;
         const unsigned int spatialOffset = inBatch / outputBatchSize;
  
         for (unsigned int inH = 0; inH < inputHeight; ++inH)
         {
             const unsigned int outH = inH * blockHeight + spatialOffset / blockWidth - cropsTop;
  
             if (outH >= outputHeight)
             {
                 continue;
             }
  
             for (unsigned int inW = 0; inW < inputWidth; ++inW)
             {
                 const unsigned int outW = inW * blockWidth + spatialOffset % blockWidth - cropsLeft;
  
                 if (outW >= outputWidth)
                 {
                     continue;
                 }
  
                 for (unsigned int c = 0; c < channels; c++)
                 {
                     unsigned int outOffset = Offset(outputShape, outBatch, outH, outW, c, dataLayout);
                     unsigned int inOffset = Offset(inputShape, inBatch, inH, inW, c, dataLayout);
  
                     outputData[outOffset];
                     inputData[inOffset];
                     outputData.Set(inputData.Get());
                 }
             }
         }
     }
 }

References BatchToSpaceNd(), CHECK_LOCATION, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetNumDimensions(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), BatchToSpaceNdDescriptor::m_BlockShape, BatchToSpaceNdDescriptor::m_Crops, BatchToSpaceNdDescriptor::m_DataLayout, Offset(), and Encoder< IType >::Set().

Referenced by BatchToSpaceNd(), and BatchToSpaceNdLayer::BatchToSpaceNdLayer().

◆ BuildAddMulAddSlotLists()

void armnn::BuildAddMulAddSlotLists	(	bool	handleReLu,
		bool	multipleOutputs,
		std::vector< SlotListType > &	inputLayersSlotLists,
		std::vector< SlotListType > &	outputLayersSlotLists
	)

Definition at line 36 of file NeonBackendOptimizationUtils.hpp.

 {
     // Build input slot list
     inputLayersSlotLists.push_back({0, 1});     // Add
     inputLayersSlotLists.push_back({1});        // Mul
     inputLayersSlotLists.push_back({1});        // Add
     if (handleReLu)
     {
         inputLayersSlotLists.push_back({});     // Relu
     }
  
     // Build output slot list
     if (multipleOutputs)
     {
         outputLayersSlotLists.push_back({0});   // Add
     }
     else
     {
         outputLayersSlotLists.push_back({});    // Add
     }
     outputLayersSlotLists.push_back({});        // Mul
     if (handleReLu)
     {
         outputLayersSlotLists.push_back({});    // Add
         outputLayersSlotLists.push_back({0});   // Relu
     }
     else
     {
         outputLayersSlotLists.push_back({0});   // Add
     }
 }

◆ BuildAddMulAddTensorInfoLists()

bool armnn::BuildAddMulAddTensorInfoLists	(	Type *	layerList[4],
		unsigned int &	numInputs,
		unsigned int &	numOutputs,
		std::vector< TensorInfo > &	inputInfos,
		std::vector< TensorInfo > &	outputInfos,
		const ActivationDescriptor *&	activationDescriptor,
		bool &	fuseReLu
	)

Definition at line 87 of file NeonBackendOptimizationUtils.hpp.

 {
     ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[0]);
     ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[1]);
     ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[2]);
  
     ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[0], BinaryOperation::Add));
     ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[1], BinaryOperation::Mul));
     ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[2], BinaryOperation::Add));
  
     fuseReLu = (layerList[3] != nullptr);
     if (fuseReLu)
     {
         activationDescriptor = &PolymorphicDowncast<ActivationLayer *>(layerList[3])->GetParameters();
         ARMNN_THROW_INVALIDARG_IF_FALSE((activationDescriptor->m_Function == ActivationFunction::ReLu) ||
                      (activationDescriptor->m_Function == ActivationFunction::BoundedReLu));
     }
  
     numInputs = 0;
     numOutputs = 0;
  
     // Ensure that there are 6 input slots in the add/mul/add layers
     // we are going to replace
     unsigned int layerIdx = 0;
     unsigned int inputSlotCount = 0;
     for (layerIdx = 0; layerIdx < 3; ++layerIdx)
     {
         for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumInputSlots(); ++slotIdx)
         {
             InputSlot* inputSlot = &layerList[layerIdx]->GetInputSlot(slotIdx);
             OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
             if (outputSlot)
             {
                 if (layerIdx == 0)
                 {
                     // Always count the input connections of the first add
                     inputInfos.push_back(inputSlot->GetTensorInfo());
                     numInputs++;
                 }
                 else
                 {
                     // For subsequent layers, we skip connections to the previous layers in the counting
                     if (&outputSlot->GetOwningLayer() != layerList[layerIdx-1])
                     {
                         TensorInfo inputSlotInfo = inputSlot->GetTensorInfo();
                         if (numInputs == 2 || numInputs == 3)
                         {
                             // Workaround the broadcast optimization to collapse shapes such as
                             // [1, 1, 1, 2] to [2] as required by backend
                             if (CollapseLeadingUnitDimensions(inputSlot->GetTensorInfo(), inputSlotInfo))
                             {
                                 OutputSlot* previousLayerSlot = inputSlot->GetConnectedOutputSlot();
                                 if (previousLayerSlot)
                                 {
                                     if (previousLayerSlot->GetOwningLayer().GetType() == LayerType::Constant)
                                     {
                                         // First update the TensorInfo in the constant owning layer
                                         previousLayerSlot->SetTensorInfo(inputSlotInfo);
                                         // Then update the TensorInfo in the workload for the owning layer
                                         ConstantLayer* layer = PolymorphicDowncast<ConstantLayer*>(
                                                 &previousLayerSlot->GetOwningLayer());
                                         layer->m_LayerOutput
                                                 = std::make_unique<ScopedTensorHandle>(
                                                 ConstTensor(inputSlotInfo,
                                                             layer->m_LayerOutput.get()->GetConstTensor<void>()));
                                     }
                                 }
                             }
                         }
                         inputInfos.push_back(inputSlotInfo);
                         numInputs++;
                     }
                 }
                 inputSlotCount++;
             }
         }
     }
  
     // Check the input counts
     bool validInputCount = (inputSlotCount == 6) && (inputInfos.size() == 4);
     if (! validInputCount)
     {
         return false;
     }
  
     const unsigned int maxIdx = (fuseReLu) ? 4 : 3;
     for (layerIdx = 0; layerIdx < maxIdx; ++layerIdx)
     {
         for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumOutputSlots(); ++slotIdx)
         {
             OutputSlot* outputSlot = &layerList[layerIdx]->GetOutputSlot(slotIdx);
  
             for (unsigned int connectionIdx = 0; connectionIdx < outputSlot->GetNumConnections(); ++connectionIdx)
             {
                 InputSlot* inputSlot = outputSlot->GetConnection(connectionIdx);
                 if (layerIdx < (maxIdx-1))
                 {
                     if (&inputSlot->GetOwningLayer() != layerList[layerIdx+1])
                     {
                         outputInfos.push_back(outputSlot->GetTensorInfo());
                         numOutputs++;
                     }
                 }
                 else if (layerList[layerIdx] != nullptr)
                 {
                     outputInfos.push_back(outputSlot->GetTensorInfo());
                     numOutputs++;
                 }
             }
         }
     }
  
     // Check the output count
     bool validOutputCount = (outputInfos.size() > 0);
     if (! validOutputCount)
     {
         return false;
     }
  
     return true;
 }

References Add, ARMNN_THROW_INVALIDARG_IF_FALSE, BoundedReLu, CollapseLeadingUnitDimensions(), Constant, InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), OutputSlot::GetNumConnections(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), InputSlot::GetTensorInfo(), OutputSlot::GetTensorInfo(), Layer::GetType(), IsSequenceLayerType(), ActivationDescriptor::m_Function, ConstantLayer::m_LayerOutput, Mul, ReLu, and OutputSlot::SetTensorInfo().

◆ CalcLevel()

int armnn::CalcLevel ( const Event * eventPtr )

Definition at line 246 of file Profiling.cpp.

 {
     int level = 0;
     while (eventPtr != nullptr)
     {
         eventPtr = eventPtr->GetParentEvent();
         level++;
     }
     return level;
 }

References Event::GetParentEvent().

Referenced by ProfilerImpl::AnalyzeEventsAndWriteResults(), and ProfilerImpl::PopulateParent().

◆ CalculateEdgeStrategy()

EdgeStrategy armnn::CalculateEdgeStrategy	(	BackendsMap &	backends,
		ITensorHandleFactory::FactoryId	srcFactoryId,
		const Layer &	layer,
		const Layer &	connectedLayer,
		TensorHandleFactoryRegistry &	registry,
		bool	importEnabled
	)

Definition at line 1723 of file Network.cpp.

 {
     auto toBackend = backends.find(connectedLayer.GetBackendId());
     ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
  
     auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
  
     // Legacy API check for backward compatibility
     if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
     {
         if (layer.GetBackendId() != connectedLayer.GetBackendId())
         {
             return EdgeStrategy::CopyToTarget;
         }
         else
         {
             return EdgeStrategy::DirectCompatibility;
         }
     }
  
     // TensorHandleFactory API present, so perform more sophisticated strategies.
     // Dst Output layers don't require copy because they use import or map/unmap
     if (connectedLayer.GetType() == LayerType::Output)
     {
         return EdgeStrategy::DirectCompatibility;
     }
  
     // Search for direct match in prefs
     for (auto&& pref : dstPrefs)
     {
         if (pref == srcFactoryId)
         {
             return EdgeStrategy::DirectCompatibility;
         }
     }
  
     // Search for export/import options
     ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
     if (srcFactory->GetExportFlags() != 0 && importEnabled)
     {
         for (auto&& pref : dstPrefs)
         {
             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
  
             // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
             if (!dstFactory) {
                 continue;
             }
             if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
             {
                 auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
                 auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
                                                                  &connectedLayer,
                                                                  CapabilityClass::PaddingRequired);
                 auto srcFallback = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
                 auto dstFallback = dstFactory->GetCapabilities(&connectedLayer,
                                                                &connectedLayer,
                                                                CapabilityClass::FallbackImportDisabled);
                 // Do not require memory copy if the source and destination do not require padding.
                 if (srcCapability.empty() && dstCapability.empty() && srcFallback.empty() && dstFallback.empty())
                 {
                     return EdgeStrategy::ExportToTarget;
                 }
             }
         }
     }
  
     // Search for copy options via map/unmap
     if (srcFactory->SupportsMapUnmap())
     {
         for (auto&& pref : dstPrefs)
         {
             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
             if (dstFactory && dstFactory->SupportsMapUnmap())
             {
                 return EdgeStrategy::CopyToTarget;
             }
         }
     }
  
     return EdgeStrategy::Undefined;
 }

References ARMNN_ASSERT_MSG, CopyToTarget, DirectCompatibility, ExportToTarget, FallbackImportDisabled, Layer::GetBackendId(), ITensorHandleFactory::GetCapabilities(), ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), ITensorHandleFactory::GetImportFlags(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, Output, PaddingRequired, ITensorHandleFactory::SupportsMapUnmap(), and Undefined.

Referenced by SelectTensorHandleStrategy().

◆ CalculateGatherNdKeyIndices()

std::map< std::string, unsigned int > CalculateGatherNdKeyIndices	(	TensorInfo	inputInfo0,
		TensorInfo	inputInfo1
	)

Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)

Parameters

inputInfo0	- TensorInfo of the corresponding input tensor: params
inputInfo1	- TensorInfo of the corresponding input tensor: indices

Returns: - A map with names and values for N, ND, K, W, C

Definition at line 312 of file WorkloadUtils.cpp.

 {
     std::vector<unsigned int> paramsShape;
     for (unsigned int i = 0; i < inputInfo0.GetNumDimensions(); ++i)
     {
         paramsShape.push_back(inputInfo0.GetShape()[i]);
     }
  
     std::vector<unsigned int> indicesShape;
     for (unsigned int i = 0; i < inputInfo1.GetNumDimensions(); ++i)
     {
         indicesShape.push_back(inputInfo1.GetShape()[i]);
     }
  
     std::map<std::string, unsigned int> keyIndices;
  
     // N: number of batches
     keyIndices["N"] = 1;
  
     // ND: number of dimensions that are sliced from params
     keyIndices["ND"] = indicesShape.back();
  
     // W: number of indices in each batch (all but the last dimension)
     keyIndices["W"] =
         static_cast<unsigned int>(std::accumulate(std::begin(indicesShape),
                                                   std::end(indicesShape) - 1,
                                                   1,
                                                   std::multiplies<>() ));
     // K: range of each index
     keyIndices["K"] =
         static_cast<unsigned int>(std::accumulate(std::begin(paramsShape),
                                                   std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
                                                   1,
                                                   std::multiplies<>() ));
     //  C: number of channels for each index
     keyIndices["C"] =
         static_cast<unsigned int>(std::accumulate(std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
                                                   std::end(paramsShape),
                                                   1,
                                                   std::multiplies<>() ));
  
     return keyIndices;
 }

References TensorInfo::GetNumDimensions(), and TensorInfo::GetShape().

Referenced by ClGatherNdWorkload::ClGatherNdWorkload(), ClGatherNdWorkloadValidate(), NeonGatherNdWorkload::NeonGatherNdWorkload(), and NeonGatherNdWorkloadValidate().

◆ CalculateSlotOption()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOption	(	BackendsMap &	backends,
		OutputSlot &	outputSlot,
		TensorHandleFactoryRegistry &	registry,
		bool	exportEnabled
	)

Definition at line 1573 of file Network.cpp.

 {
     // First ensure the from backends can support the TensorHandeAPI
     Layer& layer = outputSlot.GetOwningLayer();
     auto frmBackend = backends.find(layer.GetBackendId());
     if (frmBackend == backends.end() ||
         !frmBackend->second->SupportsTensorAllocatorAPI())
     {
         return ITensorHandleFactory::LegacyFactoryId;
     }
  
     bool outputConnection = false;
     for (auto&& connection : outputSlot.GetConnections())
     {
         const Layer& connectedLayer = connection->GetOwningLayer();
         if (connectedLayer.GetType() == LayerType::Output)
         {
             outputConnection = true;
         }
     }
  
     IBackendInternal* srcBackend = frmBackend->second.get();
     auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
  
     // Initialize the scores
     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
     for (auto&& pref : srcPrefs)
     {
         if (exportEnabled)
         {
             ITensorHandleFactory* factory = registry.GetFactory(pref);
             if (outputConnection)
             {
                 // Check if this is fallback case
                 bool fallbackConnection = false;
                 for (auto&& inputSlot : layer.GetInputSlots())
                 {
                         if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetBackendId() != layer.GetBackendId())
                         {
                             fallbackConnection = true;
                         }
                 }
                 if (fallbackConnection)
                 {
                     auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
                     // Cannot use factory import if fallback import is not supported.
                     if (!factoryCap.empty())
                     {
                         continue;
                     }
                 }
                 else if (factory->GetExportFlags() == 0)
                 {
                     continue;
                 }
             }
             if (!outputConnection)
             {
                 auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
                 // Cannot use factory import if fallback import is not supported.
                 if (!factoryCap.empty())
                 {
                     continue;
                 }
             }
  
         }
         else
         {
             // Only consider factories that support map/unmap
             ITensorHandleFactory* factory = registry.GetFactory(pref);
             if (!factory->SupportsMapUnmap())
             {
                 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
                 continue;
             }
         }
  
  
         auto it = factoryScores.find(pref);
         if (it == factoryScores.end())
         {
             // Add new score to the table
             factoryScores[pref] = 0;
         }
     }
  
     // Score each handle factory based on how many times it requires copies on the slot connections
     for (auto&& connection : outputSlot.GetConnections())
     {
         const Layer& connectedLayer = connection->GetOwningLayer();
  
         auto toBackend = backends.find(connectedLayer.GetBackendId());
         ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
  
         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
         for (auto&& src : srcPrefs)
         {
             if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
             {
                 continue;
             }
  
             for (auto&& dst : dstPrefs)
             {
                 if (RequiresCopy(src, dst, registry))
                 {
                     // Copy avoided, increase the score
                     factoryScores[src]++;
                     break;
                 }
             }
         }
     }
  
     // Find the lowest score
     int minScore = std::numeric_limits<int>::max();
     for (auto it : factoryScores)
     {
         minScore = std::min(minScore, it.second);
     }
  
     // Collect factories matching the best(lowest) score
     std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
     for (auto it : factoryScores)
     {
         if (it.second == minScore)
         {
             optimalFactories.push_back(it.first);
         }
     }
  
     // For all compatible Factories matching the best score, find the preferred one for the current layer.
     for (auto&& srcPref : srcPrefs)
     {
         for (auto&& comp : optimalFactories)
         {
             if (comp == srcPref)
             {
                 return comp;
             }
         }
     }
  
     return ITensorHandleFactory::LegacyFactoryId;
 }

References ARMNN_ASSERT_MSG, FallbackImportDisabled, Layer::GetBackendId(), ITensorHandleFactory::GetCapabilities(), OutputSlot::GetConnections(), ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), IBackendInternal::GetHandleFactoryPreferences(), Layer::GetInputSlots(), OutputSlot::GetOwningLayer(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, Output, RequiresCopy(), and ITensorHandleFactory::SupportsMapUnmap().

Referenced by SelectTensorHandleStrategy().

◆ CalculateSlotOptionForInput()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOptionForInput	(	BackendsMap &	backends,
		OutputSlot &	slot,
		TensorHandleFactoryRegistry &	registry,
		bool	importEnabled
	)

Definition at line 1478 of file Network.cpp.

 {
     Layer& layer = slot.GetOwningLayer();
     ARMNN_ASSERT(layer.GetType() == LayerType::Input);
  
     // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
     // doesn't matter which backend it is assigned to because they all use the same implementation, which
     // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
     // select a factory with maximum compatibility with the layers connected to the InputLayer.
  
     // First ensure the from backends can support the TensorHandeAPI
     auto frmBackend = backends.find(layer.GetBackendId());
     if (frmBackend == backends.end() ||
         !frmBackend->second->SupportsTensorAllocatorAPI())
     {
         return ITensorHandleFactory::LegacyFactoryId;
     }
  
     // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
     // fewest copies.
     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
     int topScore = 0;
     ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
  
     for (auto&& connection : slot.GetConnections())
     {
  
         const Layer& connectedLayer = connection->GetOwningLayer();
  
         auto toBackend = backends.find(connectedLayer.GetBackendId());
         ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
  
         if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
         {
             // The destination backend does not support the tensor allocator API, move to the next one
             continue;
         }
  
         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
         for (auto&& dst : dstPrefs)
         {
             // Input layers use the mem copy workload or import, so the selected factory must
             // support either the map/unmap API or Import API
             ITensorHandleFactory* factory = registry.GetFactory(dst);
             if (importEnabled && factory->GetImportFlags() == 0)
             {
                 continue;
             }
             else if (!importEnabled && !factory->SupportsMapUnmap())
             {
                 continue;
             }
  
             auto it = factoryScores.find(dst);
             if (it == factoryScores.end())
             {
                 // Add new score to the table
                 factoryScores[dst] = 0;
                 if (topChoice == ITensorHandleFactory::LegacyFactoryId)
                 {
                     topChoice = dst;
                 }
             }
             else
             {
                 // Increase the score
                 factoryScores[dst]++;
  
                 // Track the best option
                 if (factoryScores[dst] > topScore)
                 {
                     topScore = factoryScores[dst];
                     topChoice = dst;
                 }
             }
         }
     }
  
     return topChoice;
 }

References ARMNN_ASSERT, ARMNN_ASSERT_MSG, Layer::GetBackendId(), OutputSlot::GetConnections(), TensorHandleFactoryRegistry::GetFactory(), ITensorHandleFactory::GetImportFlags(), OutputSlot::GetOwningLayer(), Layer::GetType(), Input, ITensorHandleFactory::LegacyFactoryId, and ITensorHandleFactory::SupportsMapUnmap().

Referenced by SelectTensorHandleStrategy().

◆ CalculateSlotOptionForOutput()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOptionForOutput	(	BackendsMap &	backends,
		OutputSlot &	slot,
		TensorHandleFactoryRegistry &	registry
	)

Definition at line 1563 of file Network.cpp.

 {
     IgnoreUnused(backends, slot, registry);
     return ITensorHandleFactory::DeferredFactoryId;
 }

References ITensorHandleFactory::DeferredFactoryId, and IgnoreUnused().

Referenced by SelectTensorHandleStrategy().

◆ ChainReduceLayers()

std::vector<IConnectableLayer*> armnn::ChainReduceLayers	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ReduceDescriptor &	desc
	)

Definition at line 279 of file ArmComputeSubgraphUtils.hpp.

 {
     // Vector of new chained layers, used for substitution.
     std::vector<IConnectableLayer*> layers;
  
     // Vector of axes so each layer is reshaped correctly.
     std::vector<uint32_t> axes;
     unsigned int recalulatedAxis = 0;
  
     for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i)
     {
         // Get TensorInfo from base layer and reduce shape using axis.
         TensorInfo layerInfo = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
  
         axes.emplace_back(desc.m_vAxis[i]);
  
         const TensorInfo& reducedTensorInfo = ComputeReductionTensorShape(layerInfo,
                                                                           axes,
                                                                           desc.m_KeepDims);
  
         // Create a vector for the single axis to be assigned to the descriptor.
         // Update axis if keepDims is set reduce layers correctly.
         std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i] - recalulatedAxis);
  
         // Create a descriptor and assign single axis.
         ReduceDescriptor newReduceDescriptor = baseLayer->GetParameters();
         newReduceDescriptor.m_vAxis.assign(singleAxis.begin(), singleAxis.end());
  
         // Add new layer to graph.
         std::string layerName = "reduce_layer_" + std::to_string(i);
  
         Layer* replacementLayer = PolymorphicDowncast<Layer*>(
             optimizationViews.GetINetwork()->AddReduceLayer(newReduceDescriptor,
                                                             layerName.c_str()));
  
         // Connect previous layer with new layer.
         // The first and last layer will be connected when the subgraph is replaced.
         if (!layers.empty())
         {
             layers[i - 1]->GetOutputSlot(0).Connect(replacementLayer->GetInputSlot(0));
         }
  
         // Set updated tensorInfo for new layer.
         replacementLayer->GetOutputSlot(0).SetTensorInfo(reducedTensorInfo);
  
         if (!desc.m_KeepDims)
         {
             recalulatedAxis++;
         }
  
         layers.emplace_back(replacementLayer);
     }
  
     // Check if the TensorInfo from the last layer equals the inferred output from the original layer.
     ARMNN_ASSERT(baseLayer->GetOutputSlot(0).GetTensorInfo() ==
                  PolymorphicDowncast<Layer*>(layers.back())->GetOutputSlot().GetTensorInfo());
  
     return layers;
 }

References INetwork::AddReduceLayer(), ARMNN_ASSERT, ComputeReductionTensorShape(), OptimizationViews::GetINetwork(), Layer::GetInputSlot(), Layer::GetOutputSlot(), ReduceDescriptor::m_KeepDims, ReduceDescriptor::m_vAxis, and OutputSlot::SetTensorInfo().

◆ CheckFlag()

bool armnn::CheckFlag	(	MemorySourceFlags	flags,
		MemorySource	source
	)

inline

Definition at line 41 of file MemorySources.hpp.

 {
     return (static_cast<MemorySourceFlags>(source) & flags) != 0;
 }

Referenced by LoadedNetwork::ImportInputs(), and LoadedNetwork::ImportOutputs().

◆ CheckFp16Support()

bool armnn::CheckFp16Support	(	BackendsMap &	backends,
		const std::vector< BackendId > &	availablePreferredBackends
	)

Definition at line 1029 of file Network.cpp.

 {
     bool hasFp16 = false;
     // Check if the first preferred backend has FP16 support
     auto firstBackend = availablePreferredBackends[0];
     auto backendObjPtr = backends.find(firstBackend)->second.get();
     ARMNN_ASSERT(backendObjPtr);
     auto hasFp16Capability = BackendOptions::BackendOption{"HasFp16", true};
     auto backendCapabilities = backendObjPtr->GetCapabilities();
  
     if (HasMatchingCapability(hasFp16Capability, backendCapabilities))
     {
         // First preferred backend has FP16 support. Enable reduce FP32 to FP16 when fp16-turbo-mode is enabled.
         hasFp16 = true;
         ARMNN_LOG(debug) << "The first available preferred backend: " << firstBackend
                          << ", has FP16 support.";
     }
     else
     {
         ARMNN_LOG(warning) << "The first available preferred backend: " << firstBackend
                            << ", does not have FP16 support. "
                            << "The FP16 turbo mode option will be disable. It will run using FP32.";
     }
  
     // Check if the rest of the available preferred backends have FP16 support
     for (size_t i = 1; i < availablePreferredBackends.size(); ++i)
     {
         auto backend = availablePreferredBackends[i];
         backendObjPtr = backends.find(backend)->second.get();
         backendCapabilities = backendObjPtr->GetCapabilities();
         if (!HasMatchingCapability(hasFp16Capability, backendCapabilities))
         {
             ARMNN_LOG(warning) << "Next preferred backend: " << backend << ", does not have FP16 support. "
                                << "It will run using FP32 when falling back to this backend.";
         }
         else
         {
             ARMNN_LOG(debug) << "Next preferred backend:  " << backend << ", has FP16 support.";
         }
     }
  
     return hasFp16;
 }

References ARMNN_ASSERT, ARMNN_LOG, debug, HasMatchingCapability(), and warning.

Referenced by Optimize().

◆ CheckScaleSetOnQuantizedType()

bool armnn::CheckScaleSetOnQuantizedType	(	Layer *	layer,
		Optional< std::vector< std::string > & >	errMessages
	)

Definition at line 795 of file Network.cpp.

 {
     bool noErrors = true;
     unsigned int numOutputs = layer->GetNumOutputSlots();
     for (unsigned int i = 0; i < numOutputs; i++) {
         OutputSlot& outputSlot = layer->GetOutputSlot(i);
         TensorInfo info = outputSlot.GetTensorInfo();
         auto quantizationDataType = info.GetDataType();
         auto quantizationScales = info.GetQuantizationScales();
         // For any Quantized Tensor ensure scale(s) are set
         switch(quantizationDataType) {
             case DataType::QAsymmU8:
             case DataType::QSymmS16:
             case DataType::QSymmS8:
             case DataType::QAsymmS8:
                 if ((quantizationDataType == DataType::QAsymmU8 || quantizationDataType == DataType::QAsymmS8)
                     && info.HasPerAxisQuantization()) {
                     throw InvalidArgumentException("Per Axis Quantization is not supported in "
                                                    "Asymmetric Quantization Datatype.");
                 }
                 if ((!info.HasPerAxisQuantization() && info.GetQuantizationScale() == 0.f)
                     || (info.HasPerAxisQuantization() && (quantizationScales.end() !=
                     std::find(quantizationScales.begin(), quantizationScales.end(), 0.f)))) {
                     noErrors = false;
                     std::stringstream ss;
                     ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
                        << " (" << layer->GetNameStr() << ") is of type"
                        << " Quantized value but the scale parameter has not been set";
                     ReportError(ss.str(), errMessages);
                 }
                 // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
                 if (!info.HasPerAxisQuantization() && quantizationDataType == DataType::QAsymmU8 &&
                     (info.GetQuantizationScale() != (1.0f / 256.0f) ||
                      info.GetQuantizationOffset() != 0) &&
                     layer->GetType() == armnn::LayerType::Softmax) {
                     std::stringstream ss;
                     ss << "Quantization parameters for Softmax layer (Scale: " <<
                        info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
                        ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
                     ARMNN_LOG(warning) << ss.str();
                     info.SetQuantizationScale((1.0f / 256.0f));
                     info.SetQuantizationOffset(0);
                     outputSlot.SetTensorInfo(info);
                 }
                 break;
             default:
                 break;
         }
     }
     return noErrors;
 }

References ARMNN_LOG, GetLayerTypeAsCString(), Layer::GetNameStr(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Layer::GetType(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, ReportError(), OutputSlot::SetTensorInfo(), Softmax, and warning.

Referenced by AssignBackendsIConnectable().

◆ CheckSupportRule()

bool armnn::CheckSupportRule	(	F	rule,
		Optional< std::string & >	reasonIfUnsupported,
		const char *	reason
	)

Definition at line 38 of file LayerSupportRules.hpp.

 {
     bool supported = rule();
     if (!supported && reason)
     {
         reasonIfUnsupported.value() += std::string(reason) + "\n"; // Append the reason on a new line
     }
     return supported;
 }

References OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

◆ ClAbsWorkloadValidate()

arm_compute::Status ClAbsWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 19 of file ClAbsWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLAbsLayer::validate(&aclInput, &aclOutput);
 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClActivationWorkloadValidate()

arm_compute::Status ClActivationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ActivationDescriptor &	descriptor
	)

Definition at line 17 of file ClActivationWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     const arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
  
     return arm_compute::CLActivationLayer::validate(&aclInput,
                                                     &aclOutput,
                                                     activationLayerInfo);
 }

Referenced by ClLayerSupport::IsActivationSupported().

◆ ClAdditionValidate()

arm_compute::Status ClAdditionValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 45 of file ClAdditionWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info,
                                                                                       &aclInput1Info,
                                                                                       &aclOutputInfo,
                                                                                       g_AclConvertPolicy,
                                                                                       activationInfo);
  
     return aclStatus;
 }

Referenced by ClLayerSupport::IsAdditionSupported(), ClLayerSupport::IsLayerSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClArgMinMaxWorkloadValidate()

arm_compute::Status ClArgMinMaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ArgMinMaxDescriptor &	descriptor
	)

Definition at line 31 of file ClArgMinMaxWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     auto numDims = input.GetNumDimensions();
     auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis);
     int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
  
     if (descriptor.m_Function == ArgMinMaxFunction::Max)
     {
         return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
                                                        arm_compute::ReductionOperation::ARG_IDX_MAX);
     }
     else
     {
         return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
                                                        arm_compute::ReductionOperation::ARG_IDX_MIN);
     }
 }

Referenced by ClLayerSupport::IsArgMinMaxSupported().

◆ ClBackendId()

constexpr const char* armnn::ClBackendId ( )

constexpr

Definition at line 10 of file ClBackendId.hpp.

10 { return "GpuAcc"; }

Referenced by ClBackend::GetIdStatic().

◆ ClBatchMatMulValidate()

arm_compute::Status ClBatchMatMulValidate	(	const TensorInfo &	inputInfoX,
		const TensorInfo &	inputInfoY,
		const TensorInfo &	outputInfo,
		const BatchMatMulDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 24 of file ClBatchMatMulWorkload.cpp.

 {
     if (descriptor.m_AdjointX || descriptor.m_AdjointY )
     {
         throw Exception("Support for adjoint not implemented.");
     }
     if (descriptor.m_DataLayoutX != armnn::DataLayout::NCHW || descriptor.m_DataLayoutY != armnn::DataLayout::NCHW )
     {
         throw Exception("Only supported the MatMul in the last 2 dimensions");
     }
  
     arm_compute::TensorInfo aclInputInfoX = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoX);
     arm_compute::TensorInfo aclInputInfoY = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoY);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(outputInfo);
  
     // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
     aclInputInfoX.set_are_values_constant(false);
     aclInputInfoY.set_are_values_constant(false);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     arm_compute::MatMulInfo matMulInfo;
     matMulInfo.adj_lhs(descriptor.m_TransposeX);
     matMulInfo.adj_rhs(descriptor.m_TransposeY);
  
     return arm_compute::CLMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, activationInfo);
 }

References BatchMatMulDescriptor::m_AdjointX, BatchMatMulDescriptor::m_AdjointY, BatchMatMulDescriptor::m_DataLayoutX, BatchMatMulDescriptor::m_DataLayoutY, and NCHW.

Referenced by ClLayerSupport::IsBatchMatMulSupported().

◆ ClBatchNormalizationValidate()

arm_compute::Status ClBatchNormalizationValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TensorInfo &	mean,
		const TensorInfo &	var,
		const TensorInfo &	beta,
		const TensorInfo &	gamma,
		const BatchNormalizationDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 19 of file ClBatchNormalizationFloatWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclMeanInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclVarInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclBetaInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclGammaInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo,
                                                             &aclOutputInfo,
                                                             &aclMeanInfo,
                                                             &aclVarInfo,
                                                             &aclBetaInfo,
                                                             &aclGammaInfo,
                                                             descriptor.m_Eps,
                                                             activationInfo);
 }

Referenced by ClLayerSupport::IsBatchNormalizationSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClBatchToSpaceNdWorkloadValidate()

arm_compute::Status ClBatchToSpaceNdWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const BatchToSpaceNdDescriptor &	descriptor
	)

Definition at line 17 of file ClBatchToSpaceNdWorkload.cpp.

 {
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     arm_compute::Status statusBatchToSpace  = arm_compute::Status(arm_compute::ErrorCode::OK);
     arm_compute::Status statusReshapeInput  = arm_compute::Status(arm_compute::ErrorCode::OK);
     arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
  
     arm_compute::TensorInfo aclReshapeInputInfo  = aclInputInfo;
     arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
  
     // When a spacial dimension is missing (rank=3) set W to 1
     const unsigned int rank = input.GetNumDimensions();
     if (rank == 3)
     {
         const arm_compute::TensorShape inputShape = aclInputInfo.tensor_shape();
         const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
  
         if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
         {
             // In ACL dimensions are right to left: C, W, H, N
             aclInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
             aclOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
         }
         else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
         {
             // In ACL dimensions are right to left: W, H, C, N
             aclInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
             aclOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
         }
         else
         {
             throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
         }
  
         statusReshapeInput = arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
         statusReshapeOutput = arm_compute::CLReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
     }
  
     // ArmNN blockShape is [H, W] ACl asks for W, H
     int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
     int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
  
     const arm_compute::CropInfo cropInfo = BuildArmComputeCropInfo(descriptor, rank);
  
     statusBatchToSpace = arm_compute::CLBatchToSpaceLayer::validate(rank == 3 ? &aclReshapeInputInfo : &aclInputInfo,
                                                                     blockWidth,
                                                                     blockHeight,
                                                                     rank == 3 ? &aclReshapeOutputInfo : &aclOutputInfo,
                                                                     cropInfo);
  
     if (statusReshapeInput.error_code()  == arm_compute::ErrorCode::OK &&
         statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
         statusBatchToSpace.error_code()  == arm_compute::ErrorCode::OK)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All BatchToSpace layers validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "BatchToSpace layer validate status failed."
                                    + statusBatchToSpace.error_description()
                                    + statusReshapeInput.error_description()
                                    + statusReshapeOutput.error_description());
     }
 }

Referenced by ClLayerSupport::IsBatchToSpaceNdSupported().

◆ ClCastValidate()

arm_compute::Status ClCastValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 20 of file ClCastWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLCast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
 }

Referenced by ClLayerSupport::IsCastSupported().

◆ ClChannelShuffleValidate()

arm_compute::Status ClChannelShuffleValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ChannelShuffleDescriptor &	descriptor
	)

Definition at line 20 of file ClChannelShuffleWorkload.cpp.

 {
     arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     // In Arm NN and in NNAPI, channel shuffle implementation is datalayout agnostic and it has axis as a parameter.
     // The channel shuffle Implementation for Neon is dependent on datalayout and does not have axis as a parameter,
     // it only supports channel shuffle for 4D tensors in dimension C (1 or 3).
     arm_compute::DataLayout aclDataLayout;
     if (input.GetNumDimensions() == 4)
     {
         switch (descriptor.m_Axis)
         {
             case 1:
                 aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
                 break;
             case 3:
                 aclDataLayout = ConvertDataLayout(armnn::DataLayout::NHWC);
                 break;
             default:
                 return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported axis"};
         }
         aclInputInfo.set_data_layout(aclDataLayout);
         aclOutputInfo.set_data_layout(aclDataLayout);
         return arm_compute::CLChannelShuffleLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_NumGroups);
     }
     else
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported number of dimensions"};
     }
 }

Referenced by ClLayerSupport::IsChannelShuffleSupported().

◆ ClComparisonWorkloadValidate()

arm_compute::Status ClComparisonWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ComparisonDescriptor &	descriptor
	)

Definition at line 24 of file ClComparisonWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(descriptor);
  
     const arm_compute::Status aclStatus = arm_compute::CLComparison::validate(&aclInput0Info,
                                                                               &aclInput1Info,
                                                                               &aclOutputInfo,
                                                                               comparisonOperation);
     return aclStatus;
 }

Referenced by ClLayerSupport::IsComparisonSupported().

◆ ClConcatWorkloadValidate()

arm_compute::Status ClConcatWorkloadValidate	(	const std::vector< const TensorInfo * > &	inputs,
		const TensorInfo &	output,
		const OriginsDescriptor &	descriptor
	)

Definition at line 27 of file ClConcatWorkload.cpp.

 {
     std::vector<arm_compute::TensorInfo> aclInputs;
     for (const TensorInfo* input : inputs)
     {
         arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
         aclInputs.emplace_back(aclInputInfo);
     }
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
     std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
     for (arm_compute::ITensorInfo& input : aclInputs)
     {
         aclInputPtrs.emplace_back(&input);
     }
  
     size_t aclAxis = CalcAxis(descriptor);
     return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
 }

Referenced by ClLayerSupport::IsConcatSupported().

◆ ClConstantWorkloadValidate()

arm_compute::Status ClConstantWorkloadValidate ( const TensorInfo & output )

Definition at line 18 of file ClConstantWorkload.cpp.

 {
     const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     std::array<arm_compute::DataType,8> supportedTypes = {
             arm_compute::DataType::F16,
             arm_compute::DataType::F32,
             arm_compute::DataType::QASYMM8,
             arm_compute::DataType::QASYMM8_SIGNED,
             arm_compute::DataType::QSYMM16,
             arm_compute::DataType::QSYMM8,
             arm_compute::DataType::QSYMM8_PER_CHANNEL,
             arm_compute::DataType::S32
     };
     auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
  
     if (it != end(supportedTypes))
     {
         return arm_compute::Status{};
     }
     else
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
     }
 }

Referenced by ClLayerSupport::IsConstantSupported().

◆ ClContextBufferHasIdentifier()

bool armnn::ClContextBufferHasIdentifier ( const void * buf )

inline

Definition at line 152 of file ClContextSchema_generated.h.

                                                           {
   return flatbuffers::BufferHasIdentifier(
       buf, ClContextIdentifier());
 }

References ClContextIdentifier().

◆ ClContextExtension()

const char* armnn::ClContextExtension ( )

inline

Definition at line 167 of file ClContextSchema_generated.h.

                                         {
   return "armnn";
 }

◆ ClContextIdentifier()

const char* armnn::ClContextIdentifier ( )

inline

Definition at line 148 of file ClContextSchema_generated.h.

                                          {
   return "ARMN";
 }

Referenced by ClContextBufferHasIdentifier(), FinishClContextBuffer(), FinishSizePrefixedClContextBuffer(), VerifyClContextBuffer(), and VerifySizePrefixedClContextBuffer().

◆ ClConvertFp16ToFp32WorkloadValidate()

arm_compute::Status ClConvertFp16ToFp32WorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 44 of file ClConvertFp16ToFp32Workload.cpp.

 {
     if (input.GetDataType() != DataType::Float16)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float16");
     }
     if (output.GetDataType() != DataType::Float32)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float32");
     }
  
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
         &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
  
     return aclStatus;
 }

References Float16, Float32, and TensorInfo::GetDataType().

Referenced by ClLayerSupport::IsConvertFp16ToFp32Supported().

◆ ClConvertFp32ToFp16WorkloadValidate()

arm_compute::Status ClConvertFp32ToFp16WorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 44 of file ClConvertFp32ToFp16Workload.cpp.

 {
     if (input.GetDataType() != DataType::Float32)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float32");
     }
     if (output.GetDataType() != DataType::Float16)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float16");
     }
  
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
         &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
  
     return aclStatus;
 }

References Float16, Float32, and TensorInfo::GetDataType().

Referenced by ClLayerSupport::IsConvertFp32ToFp16Supported().

◆ ClConvolution2dWorkloadValidate()

arm_compute::Status ClConvolution2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Convolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 23 of file ClConvolution2dWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
  
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
                                                                       descriptor.m_DilationY);
  
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
  
     if (descriptor.m_BiasEnabled)
     {
         if (!biases.has_value())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN ClConvolution2dWorkload has empty bias value."};
         }
         // There's currently a problem with non const bias, so we'll explicitly block it here.
         if (!biases.value().IsConstant())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN ClDepthwiseConv2dWorkload does not support non constant bias."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
  
     arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     return arm_compute::CLConvolutionLayer::validate(&aclInputInfo,
                                                      &aclWeightsInfo,
                                                      optionalAclBiasesInfo,
                                                      &aclOutputInfo,
                                                      layerInfo,
                                                      arm_compute::WeightsInfo(),
                                                      aclDilationInfo,
                                                      activationInfo,
                                                      isFastMathEnabled);
 }

Referenced by ClLayerSupport::IsConvolution2dSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClConvolution3dWorkloadValidate()

arm_compute::Status ClConvolution3dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Convolution3dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 23 of file ClConvolution3dWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
  
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         if (!biases.has_value())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN ClConvolution3dWorkload has empty bias value."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
  
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
                                                                     isFastMathEnabled,
                                                                     activationDescriptor);
  
     return arm_compute::CLConv3D::validate(&aclInputInfo,
                                            &aclWeightsInfo,
                                            optionalAclBiasesInfo,
                                            &aclOutputInfo,
                                            aclConv3DInfo);
 }

Referenced by ClLayerSupport::IsConvolution3dSupported().

◆ ClDepthToSpaceWorkloadValidate()

arm_compute::Status ClDepthToSpaceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const DepthToSpaceDescriptor &	descriptor
	)

Definition at line 22 of file ClDepthToSpaceWorkload.cpp.

 {
     DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
  
     int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
  
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
  
     const arm_compute::Status aclStatus = arm_compute::CLDepthToSpaceLayer::validate(&aclInputInfo,
                                                                                      &aclOutputInfo,
                                                                                      blockSize);
     return aclStatus;
 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsDepthToSpaceSupported().

◆ ClDepthwiseConvolutionWorkloadValidate()

arm_compute::Status ClDepthwiseConvolutionWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const DepthwiseConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 26 of file ClDepthwiseConvolutionWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input,  descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
     //
     // ACL format for weights for depthwise is:
     // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
     // - [1, C, H, W] for [N, C, H, W] input/output layout
     //
     // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
     // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
     // so we do the permute here for the TensorInfo weights.
     unsigned int aclDepthMultiplier;
     TensorInfo weightsPermuted;
     std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
  
     // Convert the weights into the compute library format
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
  
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         if (!biases.has_value())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN ClDepthwiseConv2dWorkload has empty bias value."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
  
     const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
             descriptor.m_DilationX,
             descriptor.m_DilationY);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
                                                               &aclWeightsInfo,
                                                               optionalAclBiasesInfo,
                                                               &aclOutputInfo,
                                                               aclPadStrideInfo,
                                                               aclDepthMultiplier,
                                                               activationInfo,
                                                               aclDilationInfo);
  
 }

Referenced by ClLayerSupport::IsDepthwiseConvolutionSupported(), ClLayerSupport::IsDilatedDepthwiseConvolutionSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClDequantizeWorkloadValidate()

arm_compute::Status ClDequantizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 22 of file ClDequantizeWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLDequantizationLayer::validate(&aclInputInfo, &aclOutputInfo);
 }

Referenced by ClLayerSupport::IsDequantizeSupported().

◆ ClDivisionWorkloadValidate()

arm_compute::Status ClDivisionWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 18 of file ClDivisionWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput, activationInfo);
 }

Referenced by ClLayerSupport::IsDivisionSupported(), ClLayerSupport::IsLayerSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClElementwiseBinaryValidate()

arm_compute::Status ClElementwiseBinaryValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ElementwiseBinaryDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 64 of file ClElementwiseBinaryWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     switch (descriptor.m_Operation)
     {
         case armnn::BinaryOperation::Power:
             return arm_compute::CLElementwisePower::validate(&aclInput0Info,
                                                              &aclInput1Info,
                                                              &aclOutputInfo,
                                                              activationInfo);
         case armnn::BinaryOperation::SqDiff:
             return arm_compute::CLElementwiseSquaredDiff::validate(&aclInput0Info,
                                                                    &aclInput1Info,
                                                                    &aclOutputInfo,
                                                                    activationInfo);
         default:
             throw InvalidArgumentException("Unknown binary operator", CHECK_LOCATION());
     }
 }

Referenced by ClLayerSupport::IsLayerSupported().

◆ ClExpWorkloadValidate()

arm_compute::Status ClExpWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file ClExpWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLExpLayer::validate(&aclInput, &aclOutput);
 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClFloorWorkloadValidate()

arm_compute::Status ClFloorWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 14 of file ClFloorFloatWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLFloor::validate(&aclInput, &aclOutput);
 }

Referenced by ClLayerSupport::IsFloorSupported().

◆ ClFullyConnectedWorkloadValidate()

arm_compute::Status ClFullyConnectedWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		const FullyConnectedDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 19 of file ClFullyConnectedWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
     arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
     aclWeights.set_are_values_constant(weights.IsConstant());
  
     arm_compute::TensorInfo aclBiases;
     arm_compute::TensorInfo* optionalAclBiases = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
         aclBiases = BuildArmComputeTensorInfo(biases.value());
         aclBiases.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiases = &aclBiases;
     }
  
     const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
         ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
  
     return arm_compute::CLFullyConnectedLayer::validate(&aclInput,
                                                         &aclWeights,
                                                         optionalAclBiases,
                                                         &aclOutput,
                                                         fullyConnectedLayerInfo);
 }

Referenced by ClLayerSupport::IsFullyConnectedSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClGatherNdWorkloadValidate()

arm_compute::Status ClGatherNdWorkloadValidate	(	const TensorInfo &	paramsInfo,
		const TensorInfo &	indicesInfo,
		const TensorInfo &	outputInfo
	)

Validate Mul

Validate ReduceSum

Validate Gather

Validate Reshape

Return OK if all the layers are valid

Definition at line 16 of file ClGatherNdWorkload.cpp.

 {
     // Calculate ND, K, W, C.
     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
  
     /// Validate Mul
     // Indices with shape { W, ND }
     armnn::TensorInfo indices_W_ND_Info = indicesInfo;
     indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
     const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
  
     // Flattened coefficients with shape { ND }
     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
     const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
  
     // Output of Mul with shape { W, ND }
     const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
  
     auto statusMul = arm_compute::CLPixelWiseMultiplication::validate(&aclIndicesInfo,
                                                                       &aclFlattenedCoeffInfo,
                                                                       &aclOutputMulInfo,
                                                                       1.0f,
                                                                       arm_compute::ConvertPolicy::WRAP,
                                                                       arm_compute::RoundingPolicy::TO_ZERO,
                                                                       arm_compute::ActivationLayerInfo());
  
     /// Validate ReduceSum
     // Flattened indices with shape { W }
     armnn::TensorInfo flattenedIndices_Info = indicesInfo;
     flattenedIndices_Info.SetShape({ keyIndices["W"] });
     const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
  
     const std::vector<unsigned int> armnnReduceAxes(1, 1);
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
                                                                           indices_W_ND_Info.GetNumDimensions(),
                                                                           armnnReduceAxes);
  
     auto statusReduceSum = arm_compute::CLReductionOperation::validate(&aclOutputMulInfo,
                                                                        &aclFlattenedIndicesInfo,
                                                                        static_cast<unsigned int>(coords[0]),
                                                                        arm_compute::ReductionOperation::SUM,
                                                                        false);
  
     /// Validate Gather
     // Params with shape { K, C }
     armnn::TensorInfo params_K_C_Info =  paramsInfo;
     params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
     const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
  
     // Output of gather with shape { W, C }
     armnn::TensorInfo outputGather_Info = outputInfo;
     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
     const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
  
     auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
     auto statusGather =
             arm_compute::CLGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
  
     /// Validate Reshape
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
  
     auto statusReshape = arm_compute::CLReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
  
     /// Return OK if all the layers are valid
     auto okCode = arm_compute::ErrorCode::OK;
     if (statusMul.error_code()       == okCode &&
         statusReduceSum.error_code() == okCode &&
         statusGather.error_code()    == okCode &&
         statusReshape.error_code()   == okCode)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All GatherND layers validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "GatherND layer validate status failed.");
     }
 }

References CalculateGatherNdKeyIndices(), and TensorInfo::SetShape().

Referenced by ClLayerSupport::IsGatherNdSupported().

◆ ClGatherWorkloadValidate()

arm_compute::Status ClGatherWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	indices,
		const TensorInfo &	output,
		const GatherDescriptor &	descriptor
	)

Definition at line 15 of file ClGatherWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput   = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
     const arm_compute::TensorInfo aclOutput  = BuildArmComputeTensorInfo(output);
  
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
  
     return arm_compute::CLGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
 }

Referenced by ClLayerSupport::IsGatherSupported().

◆ ClImportTensorHandleFactoryId()

constexpr const char* armnn::ClImportTensorHandleFactoryId ( )

constexpr

Definition at line 15 of file ClImportTensorHandleFactory.hpp.

 {
     return "Arm/Cl/ImportTensorHandleFactory";
 }

Referenced by ClImportTensorHandleFactory::GetIdStatic().

◆ ClInstanceNormalizationWorkloadValidate()

arm_compute::Status ClInstanceNormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const InstanceNormalizationDescriptor &	descriptor
	)

Definition at line 18 of file ClInstanceNormalizationWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     return arm_compute::CLInstanceNormalizationLayer::validate(&aclInputInfo,
                                                                &aclOutputInfo,
                                                                descriptor.m_Gamma,
                                                                descriptor.m_Beta,
                                                                descriptor.m_Eps);
 }

Referenced by ClLayerSupport::IsInstanceNormalizationSupported().

◆ ClL2NormalizationWorkloadValidate()

arm_compute::Status ClL2NormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const L2NormalizationDescriptor &	descriptor
	)

Definition at line 17 of file ClL2NormalizationFloatWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
  
     return arm_compute::CLL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps);
 }

Referenced by ClLayerSupport::IsL2NormalizationSupported().

◆ ClLogicalAndWorkloadValidate()

arm_compute::Status ClLogicalAndWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 20 of file ClLogicalAndWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::Status aclStatus = arm_compute::CLLogicalAnd::validate(&aclInputInfo0,
                                                                               &aclInputInfo1,
                                                                               &aclOutputInfo);
     return aclStatus;
 }

Referenced by ClLayerSupport::IsLogicalBinarySupported().

◆ ClLogicalNotWorkloadValidate()

arm_compute::Status ClLogicalNotWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 20 of file ClLogicalNotWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::Status aclStatus = arm_compute::CLLogicalNot::validate(&aclInputInfo,
                                                                               &aclOutputInfo);
     return aclStatus;
 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClLogicalOrWorkloadValidate()

arm_compute::Status ClLogicalOrWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 20 of file ClLogicalOrWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::Status aclStatus = arm_compute::CLLogicalOr::validate(&aclInputInfo0,
                                                                              &aclInputInfo1,
                                                                              &aclOutputInfo);
     return aclStatus;
 }

Referenced by ClLayerSupport::IsLogicalBinarySupported().

◆ ClLogSoftmaxWorkloadValidate()

arm_compute::Status ClLogSoftmaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const LogSoftmaxDescriptor &	descriptor
	)

Definition at line 17 of file ClLogSoftmaxWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
     return arm_compute::CLLogSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
 }

Referenced by ClLayerSupport::IsLogSoftmaxSupported().

◆ ClLogWorkloadValidate()

arm_compute::Status ClLogWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file ClLogWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLLogLayer::validate(&aclInput, &aclOutput);
 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClLstmFloatWorkloadValidate()

arm_compute::Status ClLstmFloatWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	scratchBuffer,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const LstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 244 of file ClLstmFloatWorkload.cpp.

 {
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
  
     // The inputs and the outputs
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     // Basic parameters
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
  
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclInputGateBiasInfo;
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
  
     if (!descriptor.m_CifgEnabled)
     {
         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
  
         if (paramsInfo.m_CellToInputWeights != nullptr)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
                                          paramsInfo.m_CellToInputWeights != nullptr ?
                                          &aclCellToInputWeightsInfo: nullptr,
                                          &aclInputGateBiasInfo);
     }
  
     if (descriptor.m_ProjectionEnabled)
     {
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
  
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
                                                paramsInfo.m_ProjectionBias != nullptr ?
                                                &aclProjectionBiasInfo: nullptr);
     }
  
     if (descriptor.m_PeepholeEnabled)
     {
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
     }
  
     float cell_threshold = descriptor.m_ClippingThresCell;
     float projection_threshold = descriptor.m_ClippingThresProj;
  
     // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
     arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
  
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo  = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
  
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
  
         aclCellLayerNormWeightsInfo   = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
  
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
  
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ?
                                                         nullptr : &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
     }
  
     return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo,
                                               &aclInputToCellWeightsInfo,
                                               &aclInputToOutputWeightsInfo,
                                               &aclRecurrentToForgetWeightsInfo,
                                               &aclRecurrentToCellWeightsInfo,
                                               &aclRecurrentToOutputWeightsInfo,
                                               &aclForgetGateBiasInfo,
                                               &aclCellBiasInfo,
                                               &aclOutputGateBiasInfo,
                                               &aclOutputStateInInfo, &aclCellStateInInfo,
                                               &aclScratchBufferInfo, &aclOutputStateOutInfo,
                                               &aclCellStateOutInfo, &aclOutputInfo,
                                               lstm_params_info, activationLayerInfo,
                                               cell_threshold, projection_threshold);
 }

Referenced by ClLayerSupport::IsLstmSupported().

◆ ClMaximumWorkloadValidate()

arm_compute::Status ClMaximumWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 24 of file ClMaximumWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::Status aclStatus = arm_compute::CLElementwiseMax::validate(&aclInput0Info,
                                                                                   &aclInput1Info,
                                                                                   &aclOutputInfo);
  
     return aclStatus;
 }

Referenced by ClLayerSupport::IsLayerSupported(), and ClLayerSupport::IsMaximumSupported().

◆ ClMeanValidate()

arm_compute::Status ClMeanValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const MeanDescriptor &	descriptor
	)

Definition at line 17 of file ClMeanWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                           input.GetNumDimensions(),
                                                                           descriptor.m_Axis);
  
     return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
 }

Referenced by ClLayerSupport::IsMeanSupported().

◆ ClMinimumWorkloadValidate()

arm_compute::Status ClMinimumWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 24 of file ClMinimumWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::Status aclStatus = arm_compute::CLElementwiseMin::validate(&aclInput0Info,
                                                                                   &aclInput1Info,
                                                                                   &aclOutputInfo);
  
     return aclStatus;
 }

Referenced by ClLayerSupport::IsLayerSupported(), and ClLayerSupport::IsMinimumSupported().

◆ ClMultiplicationWorkloadValidate()

arm_compute::Status ClMultiplicationWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 18 of file ClMultiplicationWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     auto convertPolicy = (IsQuantizedType(input0.GetDataType()) || IsQuantizedType(input1.GetDataType())) ?
                           arm_compute::ConvertPolicy::SATURATE :
                           arm_compute::ConvertPolicy::WRAP;
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
     // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
     // ignored for F32 tensors.
     return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1,
                                                             &aclInput2,
                                                             &aclOutput,
                                                             1.0f,
                                                             convertPolicy,
                                                             arm_compute::RoundingPolicy::TO_ZERO,
                                                             activationInfo);
 }

Referenced by ClLayerSupport::IsLayerSupported(), ClLayerSupport::IsMultiplicationSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClNegWorkloadValidate()

arm_compute::Status ClNegWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file ClNegWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLNegLayer::validate(&aclInput, &aclOutput);
 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClNormalizationWorkloadValidate()

arm_compute::Status ClNormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const NormalizationDescriptor &	descriptor
	)

Definition at line 19 of file ClNormalizationFloatWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     arm_compute::NormalizationLayerInfo layerInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
  
     return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
 }

Referenced by ClLayerSupport::IsNormalizationSupported().

◆ ClPadValidate()

arm_compute::Status ClPadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const PadDescriptor &	descriptor
	)

Definition at line 62 of file ClPadWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size());
  
     std::reverse_copy(std::begin(descriptor.m_PadList),
                       std::end(descriptor.m_PadList),
                       std::begin(reversed_PadList));
  
     arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
  
     // PixelValue is currently unused when validating, but it's required to pass in PaddingMode.
     arm_compute::PixelValue pixelValue = GetPixelValue(&aclInputInfo, descriptor.m_PadValue);
     const arm_compute::Status aclStatus =
             arm_compute::CLPadLayer::validate(&aclInputInfo,
                                               &aclOutputInfo,
                                               padList,
                                               pixelValue,
                                               ConvertPaddingModeToAcl(descriptor.m_PaddingMode));
  
     return aclStatus;
 }

Referenced by ClLayerSupport::IsPadSupported().

◆ ClPermuteWorkloadValidate()

arm_compute::Status ClPermuteWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const PermuteDescriptor &	descriptor
	)

Definition at line 17 of file ClPermuteWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
     const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
  
     return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo,
                                             armcomputetensorutils::BuildArmComputePermutationVector(mappings));
 }

Referenced by ClLayerSupport::IsPermuteSupported().

◆ ClPooling2dWorkloadValidate()

arm_compute::Status ClPooling2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Pooling2dDescriptor &	descriptor
	)

Definition at line 18 of file ClPooling2dWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
  
     return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
 }

Referenced by ClLayerSupport::IsPooling2dSupported().

◆ ClPooling3dWorkloadValidate()

arm_compute::Status ClPooling3dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Pooling3dDescriptor &	descriptor
	)

Definition at line 18 of file ClPooling3dWorkload.cpp.

     {
         const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
         const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
         arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
  
         return arm_compute::CLPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
     }

Referenced by ClLayerSupport::IsPooling3dSupported().

◆ ClPreluWorkloadValidate()

arm_compute::Status ClPreluWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	alpha,
		const TensorInfo &	output
	)

Definition at line 16 of file ClPreluWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLPReluLayer::validate(&aclInput,
                                                &aclAlpha,
                                                &aclOutput);
 }

Referenced by ClLayerSupport::IsPreluSupported().

◆ ClQLstmWorkloadValidate()

arm_compute::Status ClQLstmWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	output,
		const QLstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 247 of file ClQLstmWorkload.cpp.

 {
     arm_compute::LSTMParams<arm_compute::ITensorInfo> aclParamsInfo;
  
     // Input/Output tensor info
     const arm_compute::TensorInfo aclInputInfo         = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo   = BuildArmComputeTensorInfo(cellStateIn);
  
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo   = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputInfo         = BuildArmComputeTensorInfo(output);
  
     // Mandatory tensor info
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
  
     // Optional tensor info
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
  
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
  
     arm_compute::TensorInfo aclInputGateBiasInfo;
  
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
  
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
  
     // Create tensor info for optional params if they are enabled
     if (descriptor.m_PeepholeEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
  
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
  
         // Set peephole params info
         aclParamsInfo.set_peephole_params(&aclCellToForgetWeightsInfo,
                                           &aclCellToOutputWeightsInfo);
     }
  
     if (descriptor.m_ProjectionEnabled)
     {
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
  
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
  
         // Set projection params info
         aclParamsInfo.set_projection_params(
             &aclProjectionWeightsInfo,
             paramsInfo.m_ProjectionBias != nullptr ? &aclProjectionBiasInfo : nullptr);
     }
  
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
  
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo   = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
  
         // Set layer norm params info
         aclParamsInfo.set_layer_normalization_params(
             paramsInfo.m_InputLayerNormWeights != nullptr ? &aclInputLayerNormWeightsInfo : nullptr,
             &aclForgetLayerNormWeightsInfo,
             &aclCellLayerNormWeightsInfo,
             &aclOutputLayerNormWeightsInfo);
     }
  
     if (!descriptor.m_CifgEnabled)
     {
         aclInputToInputWeightsInfo     = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo           = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
  
         // Set CIFG params info
         aclParamsInfo.set_cifg_params(
             &aclInputToInputWeightsInfo,
             &aclRecurrentToInputWeightsInfo,
             paramsInfo.m_CellToInputWeights != nullptr ? &aclCellToInputWeightsInfo : nullptr,
             &aclInputGateBiasInfo);
     }
  
     // Set scalar descriptor params
     aclParamsInfo.set_cell_clip_params(descriptor.m_CellClip);
     aclParamsInfo.set_projection_clip_params(descriptor.m_ProjectionClip);
     aclParamsInfo.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
     aclParamsInfo.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
                                           descriptor.m_ForgetIntermediateScale,
                                           descriptor.m_CellIntermediateScale,
                                           descriptor.m_OutputIntermediateScale);
  
     // QLSTM CL validate
     return arm_compute::CLQLSTMLayer::validate(&aclInputInfo,
                                                &aclInputToForgetWeightsInfo,
                                                &aclInputToCellWeightsInfo,
                                                &aclInputToOutputWeightsInfo,
                                                &aclRecurrentToForgetWeightsInfo,
                                                &aclRecurrentToCellWeightsInfo,
                                                &aclRecurrentToOutputWeightsInfo,
                                                &aclForgetGateBiasInfo,
                                                &aclCellBiasInfo,
                                                &aclOutputGateBiasInfo,
                                                &aclCellStateInInfo,
                                                &aclOutputStateInInfo,
                                                &aclCellStateOutInfo,
                                                &aclOutputStateOutInfo,
                                                &aclOutputInfo,
                                                aclParamsInfo);
 }

Referenced by ClLayerSupport::IsQLstmSupported().

◆ ClQuantizedLstmWorkloadValidate()

arm_compute::Status ClQuantizedLstmWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	previousCellStateIn,
		const TensorInfo &	previousOutputIn,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const QuantizedLstmInputParamsInfo &	paramsInfo
	)

Definition at line 18 of file ClQuantizedLstmWorkload.cpp.

 {
     // Inputs
     const arm_compute::TensorInfo aclInputInfo               = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclPreviousCellStateInInfo = BuildArmComputeTensorInfo(previousCellStateIn);
     const arm_compute::TensorInfo aclPreviousOutputInInfo    = BuildArmComputeTensorInfo(previousOutputIn);
  
     // Outputs
     const arm_compute::TensorInfo aclCellStateOutInfo        = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputInfo              = BuildArmComputeTensorInfo(output);
  
     // Basic parameters
     const arm_compute::TensorInfo aclInputToInputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclInputGateBiasInfo  = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
     const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo       = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
  
     return arm_compute::CLLSTMLayerQuantized::validate(&aclInputInfo, &aclInputToInputWeightsInfo,
                                                        &aclInputToForgetWeightsInfo, &aclInputToCellWeightsInfo,
                                                        &aclInputToOutputWeightsInfo, &aclRecurrentToInputWeightsInfo,
                                                        &aclRecurrentToForgetWeightsInfo, &aclRecurrentToCellWeightsInfo,
                                                        &aclRecurrentToOutputWeightsInfo, &aclInputGateBiasInfo,
                                                        &aclForgetGateBiasInfo, &aclCellBiasInfo, &aclOutputGateBiasInfo,
                                                        &aclPreviousCellStateInInfo, &aclPreviousOutputInInfo,
                                                        &aclCellStateOutInfo, &aclOutputInfo);
 }

Referenced by ClLayerSupport::IsQuantizedLstmSupported().

◆ ClQuantizeWorkloadValidate()

arm_compute::Status ClQuantizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 22 of file ClQuantizeWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLQuantizationLayer::validate(&aclInputInfo,
                                                       &aclOutputInfo);
 }

Referenced by ClLayerSupport::IsQuantizeSupported().

◆ ClReduceWorkloadValidate()

arm_compute::Status ClReduceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ReduceDescriptor &	descriptor
	)

Definition at line 18 of file ClReduceWorkload.cpp.

 {
     if (descriptor.m_vAxis.size() == 1 || descriptor.m_vAxis.empty())
     {
         const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
         const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
         arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                               input.GetNumDimensions(),
                                                                               descriptor.m_vAxis);
  
         return arm_compute::CLReductionOperation::validate(&aclInputInfo,
                                                            &aclOutputInfo,
                                                            static_cast<unsigned int>(coords[0]),
                                                            ConvertReductionOperationToAcl(descriptor),
                                                            descriptor.m_KeepDims);
     }
     else
     {
         // Validate layer if there are multiple axes.
         arm_compute::Status status;
         IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, descriptor, status);
         return status;
     }
 }

References ReduceDescriptor::m_vAxis.

Referenced by ClLayerSupport::IsReduceSupported().

◆ ClReshapeWorkloadValidate()

arm_compute::Status ClReshapeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 15 of file ClReshapeWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
 }

Referenced by ClLayerSupport::IsReshapeSupported().

◆ ClResizeWorkloadValidate()

arm_compute::Status ClResizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ResizeDescriptor &	descriptor
	)

Definition at line 22 of file ClResizeWorkload.cpp.

 {
     arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout);
     aclInputInfo.set_data_layout(aclDataLayout);
     aclOutputInfo.set_data_layout(aclDataLayout);
  
     arm_compute::InterpolationPolicy aclInterpolationPolicy =
         ConvertResizeMethodToAclInterpolationPolicy(descriptor.m_Method);
  
     arm_compute::SamplingPolicy samplingPolicy = descriptor.m_HalfPixelCenters ? arm_compute::SamplingPolicy::CENTER :
                                                                                  arm_compute::SamplingPolicy::TOP_LEFT;
  
     return arm_compute::CLScale::validate(&aclInputInfo,
                                           &aclOutputInfo,
                                           arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
                                                                        arm_compute::BorderMode::REPLICATE,
                                                                        arm_compute::PixelValue(0.f),
                                                                        samplingPolicy,
                                                                        true,
                                                                        descriptor.m_AlignCorners));
 }

Referenced by ClLayerSupport::IsResizeSupported().

◆ ClReverseV2WorkloadValidate()

arm_compute::Status ClReverseV2WorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	axis,
		const TensorInfo &	output
	)

Definition at line 16 of file ClReverseV2Workload.cpp.

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclAxis = BuildArmComputeTensorInfo(axis);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLReverse::validate(&aclInput, &aclOutput, &aclAxis, true);
 }

Referenced by ClLayerSupport::IsReverseV2Supported().

◆ ClRsqrtWorkloadValidate()

arm_compute::Status ClRsqrtWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file ClRsqrtWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLRsqrtLayer::validate(&aclInput, &aclOutput);
 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClSinWorkloadValidate()

arm_compute::Status ClSinWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file ClSinWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::CLSinLayer::validate(&aclInput, &aclOutput);
 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClSliceWorkloadValidate()

arm_compute::Status ClSliceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SliceDescriptor &	descriptor
	)

Definition at line 18 of file ClSliceWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
  
     std::tie(starts, ends) = SetClSliceData(descriptor.m_Begin, descriptor.m_Size);
  
     return arm_compute::CLSlice::validate(&aclInput, &aclOutput, starts, ends);
 }

Referenced by ClLayerSupport::IsSliceSupported().

◆ ClSoftmaxWorkloadValidate()

arm_compute::Status ClSoftmaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SoftmaxDescriptor &	descriptor
	)

Definition at line 17 of file ClSoftmaxWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
     return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
 }

Referenced by ClLayerSupport::IsSoftmaxSupported().

◆ ClSpaceToBatchNdWorkloadValidate()

arm_compute::Status ClSpaceToBatchNdWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SpaceToBatchNdDescriptor &	descriptor
	)

Definition at line 16 of file ClSpaceToBatchNdWorkload.cpp.

 {
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     arm_compute::Status statusSpaceToBatch  = arm_compute::Status(arm_compute::ErrorCode::OK);
     arm_compute::Status statusReshapeInput  = arm_compute::Status(arm_compute::ErrorCode::OK);
     arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
  
     arm_compute::TensorInfo aclReshapeInputInfo  = aclInputInfo;
     arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
  
     // When a spacial dimension is missing (rank=3) set W to 1
     const unsigned int rank = input.GetNumDimensions();
     if (rank == 3)
     {
         const arm_compute::TensorShape inputShape = aclInputInfo.tensor_shape();
         const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
  
         if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
         {
             // In ACL dimensions are right to left: C, W, H, N
             aclInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
             aclOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
         }
         else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
         {
             // In ACL dimensions are right to left: W, H, C, N
             aclInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
             aclOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
         }
         else
         {
             throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
         }
  
         statusReshapeInput = arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
         statusReshapeOutput = arm_compute::CLReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
     }
  
     // ArmNN blockShape is [H, W] ACl asks for W, H
     int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
     int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
  
     unsigned int padLeft  = (rank == 3) ? 0 : descriptor.m_PadList[1].first;
     unsigned int padRight = (rank == 3) ? 0 : descriptor.m_PadList[1].second;
     arm_compute::Size2D paddingLeftTop     = BuildArmComputeSize2D(padLeft,
                                                                    descriptor.m_PadList[0].first);
     arm_compute::Size2D paddingRightBottom = BuildArmComputeSize2D(padRight,
                                                                    descriptor.m_PadList[0].second);
  
     const arm_compute::Status aclStatus = arm_compute::CLSpaceToBatchLayer::validate(&aclInputInfo,
                                                                                      blockWidth,
                                                                                      blockHeight,
                                                                                      paddingLeftTop,
                                                                                      paddingRightBottom,
                                                                                      &aclOutputInfo);
  
     if (statusReshapeInput.error_code()  == arm_compute::ErrorCode::OK &&
         statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
         statusSpaceToBatch.error_code()  == arm_compute::ErrorCode::OK)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All SpaceToBatch layers validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "SpaceToBatch layer validate status failed."
                                    + statusSpaceToBatch.error_description()
                                    + statusReshapeInput.error_description()
                                    + statusReshapeOutput.error_description());
     }
 }

Referenced by ClLayerSupport::IsSpaceToBatchNdSupported().

◆ ClSpaceToDepthWorkloadValidate()

arm_compute::Status ClSpaceToDepthWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SpaceToDepthDescriptor &	descriptor
	)

Definition at line 54 of file ClSpaceToDepthWorkload.cpp.

 {
     DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
  
     int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
  
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
  
     const arm_compute::Status aclStatus = arm_compute::CLSpaceToDepthLayer::validate(&aclInputInfo,
                                                                                      &aclOutputInfo,
                                                                                      blockSize);
     return aclStatus;
 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsSpaceToDepthSupported().

◆ ClSplitterWorkloadValidate()

arm_compute::Status ClSplitterWorkloadValidate	(	const TensorInfo &	input,
		const std::vector< std::reference_wrapper< TensorInfo >> &	outputs,
		unsigned int	splitAxis
	)

Definition at line 31 of file ClSplitterWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
  
     size_t numOutputs = outputs.size();
  
     std::vector<arm_compute::TensorInfo> aclOutputs;
     aclOutputs.reserve(numOutputs);
  
     std::vector<arm_compute::ITensorInfo*> aclOutputPtr;
     aclOutputPtr.reserve(numOutputs);
  
     for (size_t i = 0u; i < outputs.size(); ++i)
     {
         aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i]));
         aclOutputPtr.emplace_back(&aclOutputs.back());
     }
  
     unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis);
     return arm_compute::CLSplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
 }

Referenced by ClLayerSupport::IsSplitterSupported().

◆ ClSqrtWorkloadValidate()

arm_compute::Status ClSqrtWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 19 of file ClSqrtWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     ActivationDescriptor descriptor;
     descriptor.m_Function = ActivationFunction::Sqrt;
     const arm_compute::ActivationLayerInfo activationLayerInfo =
             ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
  
     return arm_compute::CLActivationLayer::validate(&aclInput, &aclOutput, activationLayerInfo);
 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClStackWorkloadValidate()

arm_compute::Status ClStackWorkloadValidate	(	const std::vector< const TensorInfo * > &	inputs,
		const TensorInfo &	output,
		const StackDescriptor &	descriptor
	)

Definition at line 29 of file ClStackWorkload.cpp.

 {
     std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
     arm_compute::TensorInfo aclInputInfo;
     for (const TensorInfo* input : inputs)
     {
         aclInputInfo = BuildArmComputeTensorInfo(*input);
         aclInputPtrs.emplace_back(&aclInputInfo);
     }
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
  
     return arm_compute::CLStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
 }

Referenced by ClLayerSupport::IsStackSupported().

◆ ClStridedSliceWorkloadValidate()

arm_compute::Status ClStridedSliceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const StridedSliceDescriptor &	descriptor
	)

Definition at line 27 of file ClStridedSliceWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
     arm_compute::Coordinates strides;
  
     std::tie(starts, ends, strides) = SetClStridedSliceData(descriptor.m_Begin, descriptor.m_End, descriptor.m_Stride);
  
     auto numDimensions       = armnn::numeric_cast<int>(input.GetNumDimensions());
     int32_t begin_mask       = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions);
     int32_t end_mask         = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions);
     int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions);
  
     return arm_compute::CLStridedSlice::validate(&aclInputInfo,
                                         &aclOutputInfo,
                                         starts,
                                         ends,
                                         strides,
                                         begin_mask,
                                         end_mask,
                                         shrink_axis_mask);
 }

Referenced by ClLayerSupport::IsStridedSliceSupported().

◆ ClSubtractionValidate()

arm_compute::Status ClSubtractionValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 46 of file ClSubtractionWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info,
                                                                                          &aclInput1Info,
                                                                                          &aclOutputInfo,
                                                                                          g_AclConvertPolicy,
                                                                                          activationInfo);
  
     return aclStatus;
 }

Referenced by ClLayerSupport::IsLayerSupported(), ClLayerSupport::IsSubtractionSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClTensorHandleFactoryId()

constexpr const char* armnn::ClTensorHandleFactoryId ( )

constexpr

Definition at line 15 of file ClTensorHandleFactory.hpp.

 {
     return "Arm/Cl/TensorHandleFactory";
 }

Referenced by ClTensorHandleFactory::GetIdStatic().

◆ ClTileWorkloadValidate()

arm_compute::Status ClTileWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TileDescriptor &	descriptor
	)

Definition at line 16 of file ClTileWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
  
     std::vector<uint32_t> aclMultiples = descriptor.m_Multiples;
     std::reverse(aclMultiples.begin(),aclMultiples.end());
  
     return arm_compute::CLTile::validate(&aclInput, &aclOutput, aclMultiples);
 }

Referenced by ClLayerSupport::IsTileSupported().

◆ ClTransposeConvolution2dWorkloadValidate()

arm_compute::Status ClTransposeConvolution2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TransposeConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases
	)

Definition at line 26 of file ClTransposeConvolution2dWorkload.cpp.

 {
     arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclOutputInfo  = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
  
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
  
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
  
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
  
     arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(descriptor);
  
     return arm_compute::CLDeconvolutionLayer::validate(&aclInputInfo,
                                                        &aclWeightsInfo,
                                                        optionalAclBiasesInfo,
                                                        &aclOutputInfo,
                                                        padStrideInfo);
 }

Referenced by ClLayerSupport::IsTransposeConvolution2dSupported().

◆ ClTransposeWorkloadValidate()

arm_compute::Status ClTransposeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TransposeDescriptor &	descriptor
	)

Definition at line 17 of file ClTransposeWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
     const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
  
     return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo,
                                             armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
 }

Referenced by ClLayerSupport::IsTransposeSupported().

◆ ClUnidirectionalSequenceLstmFloatWorkloadValidate()

arm_compute::Status ClUnidirectionalSequenceLstmFloatWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const UnidirectionalSequenceLstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 508 of file ClUnidirectionalSequenceLstmFloatWorkload.cpp.

 {
     TensorShape inputLayerShape  = input.GetShape();
     TensorShape outputLayerShape = output.GetShape();
  
     if (inputLayerShape.GetNumDimensions() != 3)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "Unidirectional Sequence LSTM layer validate status failed.");
     }
  
     unsigned int maxTime    = descriptor.m_TimeMajor?inputLayerShape[0]:inputLayerShape[1];
     unsigned int batchSize  = descriptor.m_TimeMajor?inputLayerShape[1]:inputLayerShape[0];
     unsigned int inputSize  = inputLayerShape[2];
     unsigned int outputSize = outputLayerShape[2];
  
     const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
     const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
  
     arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute1 status");
     arm_compute::Status statusSplit    = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Split status");
     arm_compute::Status statusLSTM     = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "LSTM status");
     arm_compute::Status statusConcat   = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Concat status");
     arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute2 status");
  
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     //
     // Permute validate
     //
     TensorInfo              permuteOutInfo    = armnnUtils::Permuted(input, { 1U, 0U, 2U });
     arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
     if (!descriptor.m_TimeMajor)
     {
         statusPermute1 = arm_compute::CLPermute::validate(&aclInputInfo,
                                                           &aclPermuteOutInfo,
                                                           arm_compute::PermutationVector(0U, 2U, 1U));
     }
  
     //
     // Split and Concat Tensors validate
     //
     std::vector<arm_compute::TensorInfo>         splitterOutputsTensorInfos;
     std::vector<arm_compute::TensorInfo>         concatInputsTensorInfos;
     std::vector<arm_compute::ITensorInfo*>       splitterOutputsTensorInfosPtr;
     std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
     splitterOutputsTensorInfos.reserve(maxTime);
     concatInputsTensorInfos.reserve(maxTime);
     for (unsigned int i = 0; i < maxTime; ++i)
     {
         arm_compute::TensorInfo splitter_out;
         arm_compute::TensorInfo concat_in;
  
         auto splitterTensorInfo = TensorInfo(input);
         auto concatTensorInfo   = TensorInfo(output);
         splitterTensorInfo.SetShape({batchSize, inputSize});
         concatTensorInfo.SetShape({batchSize, outputSize});
  
         arm_compute::TensorInfo aclSplitterTensorInfo
                                     = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
         arm_compute::TensorInfo aclConcatTensorInfo
                                     = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
  
         splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
         concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
         splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
         concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
     }
  
     //
     // Split validate
     //
     unsigned int numberDimensions = 3;
     unsigned int dimension        = 0; // splitting on 0-dimension (i.e. maxTime dimension)
     unsigned int aclAxisSplit     = CalcAclAxis(numberDimensions, dimension);
  
     if (maxTime != 1) // ACL split does not work with only one element to split.
     {
         if (!descriptor.m_TimeMajor)
         {
             statusSplit = arm_compute::CLSplit::validate(&aclPermuteOutInfo,
                                                          splitterOutputsTensorInfosPtr,
                                                          aclAxisSplit);
         }
         else
         {
             statusSplit = arm_compute::CLSplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
         }
     }
  
     //
     // LSTM validate
     //
  
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
  
     unsigned int numUnits = cellStateIn.GetShape()[1];
     unsigned int scratchBufferFactor = 4;
  
     if (descriptor.m_CifgEnabled)
     {
         // scratchBuffer = { batchSize, numUnits * 3 } with CIFG
        scratchBufferFactor = 3;
     }
  
     const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());
  
     // The inputs and outputs
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
  
     // Basic parameters
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
  
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclInputGateBiasInfo;
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
  
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
  
  
     if (!descriptor.m_CifgEnabled)
     {
         if (descriptor.m_PeepholeEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
  
         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
                                          &aclRecurrentToInputWeightsInfo,
                                          descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
                                          &aclInputGateBiasInfo);
     }
  
     if (descriptor.m_ProjectionEnabled)
     {
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
  
         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
                                                paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
     }
  
     if (descriptor.m_PeepholeEnabled)
     {
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
  
         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
     }
  
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
  
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
                                                         &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
     }
  
     // Need to be set at negative threshold to be compatible for ACL
     float cell_threshold = descriptor.m_ClippingThresCell;
     float projection_threshold = descriptor.m_ClippingThresProj;
  
     arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
  
     for (unsigned int i = 0; i != maxTime; ++i)
     {
  
         // Set LSTM input and output ITensors depending on:
         // input format (timeMajor) & number of LSTM batches (maxTime).
         arm_compute::ITensorInfo* outputLSTM;
         arm_compute::ITensorInfo* inputLSTM;
         // If there is only one LSTM time major batch, we will not concat OR permute.
         // Set input of LSTM to be first input ITensor.
         // Set output of LSTM to be final output ITensor.
         // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
         if (maxTime == 1 && !descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
             TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
             const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
             outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
         }
             // If there is only one LSTM batch major batch, we will not concat, only permute.
             // Set input of LSTM to be output of initial permute.
             // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
             // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
         else if (maxTime == 1 && !descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = &aclPermuteOutInfo;
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
             // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
         else
         {
             inputLSTM = splitterOutputsTensorInfosPtr[i];
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
  
         statusLSTM = arm_compute::CLLSTMLayer::validate(inputLSTM,
                                                         &aclInputToForgetWeightsInfo,
                                                         &aclInputToCellWeightsInfo,
                                                         &aclInputToOutputWeightsInfo,
                                                         &aclRecurrentToForgetWeightsInfo,
                                                         &aclRecurrentToCellWeightsInfo,
                                                         &aclRecurrentToOutputWeightsInfo,
                                                         &aclForgetGateBiasInfo,
                                                         &aclCellBiasInfo,
                                                         &aclOutputGateBiasInfo,
                                                         &aclOutputStateInInfo,
                                                         &aclCellStateInInfo,
                                                         &aclScratchBufferInfo,
                                                         &aclOutputStateOutInfo,
                                                         &aclCellStateOutInfo,
                                                         outputLSTM,
                                                         lstm_params_info,
                                                         activationLayerInfo,
                                                         cell_threshold,
                                                         projection_threshold);
  
         if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
         {
             break;
         }
     }
  
     //
     // Concat validate
     //
  
     // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
     TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
     TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
     TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
  
     TensorInfo concatOuputTensorInfo = TensorInfo(output);
     concatOuputTensorInfo.SetShape(timeMajorShapeOutput);
     arm_compute::TensorInfo aclConcatOuputTensorInfo= BuildArmComputeTensorInfo(concatOuputTensorInfo);
  
     if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
     {
         for (unsigned int i = 0; i < maxTime; ++i)
         {
             auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
             concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
         }
  
         unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
         if (!descriptor.m_TimeMajor)
         {
             statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclConcatOuputTensorInfo,
                                                                      aclAxisConcat);
         }
         else
         {
             statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclOutputInfo,
                                                                      aclAxisConcat);
         }
     }
     // If only one LSTM batch, we do not concat and/or permute.
     // Must ensure final output info is expanded to correct batch major dimensions.
     else
     {
         if (!descriptor.m_TimeMajor)
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandBatchMajor));
         }
         else
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandTimeMajor));
         }
     }
     //
     // Permute validate
     //
     if (!descriptor.m_TimeMajor)
     {
         // Output now time major. Permute output back to batch major.
         if (maxTime != 1)
         {
             statusPermute2 = arm_compute::CLPermute::validate(&aclConcatOuputTensorInfo,
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
         else
         {
             statusPermute2 = arm_compute::CLPermute::validate(concatInputsTensorInfosPtr[0],
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
     }
  
     auto okCode = arm_compute::ErrorCode::OK;
     if (statusPermute1.error_code() == okCode &&
         statusSplit.error_code()    == okCode &&
         statusLSTM .error_code()    == okCode &&
         statusConcat.error_code()   == okCode &&
         statusPermute2.error_code() == okCode)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All Unidirectional Sequence LSTM layer validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "Unidirectional Sequence LSTM layer validate status failed.");
     }
 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by ClLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ CollapseLeadingUnitDimensions()

bool armnn::CollapseLeadingUnitDimensions	(	const TensorInfo &	in,
		TensorInfo &	out
	)

inline

Definition at line 14 of file NeonBackendOptimizationUtils.hpp.

 {
     unsigned int numDimensions = in.GetNumDimensions();
     for (unsigned int i = 0; i < (numDimensions-1); ++i)
     {
         if (in.GetShape()[i] != 1)
         {
             return false;
         }
     }
  
     unsigned int w = in.GetShape()[numDimensions-1];
     out = in;
     out.SetShape({w});
  
     return true;
 }

References TensorInfo::GetNumDimensions(), TensorInfo::GetShape(), and TensorInfo::SetShape().

Referenced by BuildAddMulAddTensorInfoLists().

◆ Combine() [1/2]

MemorySourceFlags armnn::Combine	(	Arg	source,
		Args...	rest
	)

Definition at line 36 of file MemorySources.hpp.

 {
     return static_cast<MemorySourceFlags>(source) | Combine(rest...);
 }

References Combine().

◆ Combine() [2/2]

MemorySourceFlags armnn::Combine	(	Arg	sourceA,
		Arg	sourceB
	)

Definition at line 30 of file MemorySources.hpp.

 {
     return static_cast<MemorySourceFlags>(sourceA) | static_cast<MemorySourceFlags>(sourceB);
 }

Referenced by Combine().

◆ ComputeAclAxis()

int armnn::ComputeAclAxis	(	const int &	armnnAxis,
		const armnn::TensorInfo &	tensor
	)

inline

Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank)

Definition at line 273 of file ArmComputeUtils.hpp.

 {
     int rank = static_cast<int>(tensor.GetNumDimensions());
  
     ARMNN_ASSERT(rank != 0);
     ARMNN_ASSERT((-1 * rank) <= armnnAxis);
     ARMNN_ASSERT(armnnAxis < rank);
  
     int sign = (armnnAxis < 0) ? -1 : 1;
     int aclAxis = sign * rank - 1  - armnnAxis;
  
     return aclAxis;
 }

References ARMNN_ASSERT, and TensorInfo::GetNumDimensions().

Referenced by ClGatherWorkload::ClGatherWorkload(), ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(), ClSoftmaxWorkload::ClSoftmaxWorkload(), NeonGatherWorkload::NeonGatherWorkload(), NeonLogSoftmaxWorkload::NeonLogSoftmaxWorkload(), and NeonSoftmaxWorkload::NeonSoftmaxWorkload().

◆ ComputeConv3DInfo() [1/2]

arm_compute::Conv3dInfo armnn::ComputeConv3DInfo	(	const armnn::Convolution3dDescriptor	descriptor,
		bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

inline

Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor.

Definition at line 288 of file ArmComputeUtils.hpp.

 {
     const arm_compute::Size3D    stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
     const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
                                          descriptor.m_PadTop, descriptor.m_PadBottom,
                                          descriptor.m_PadFront, descriptor.m_PadBack};
     const arm_compute::Size3D    dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
  
     const arm_compute::ActivationLayerInfo activationInfo =
             ConvertActivationDescriptorToAclActivationLayerInfo(activationDescriptor);
     const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
  
     return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
 }

◆ ComputeConv3DInfo() [2/2]

arm_compute::Conv3dInfo armnn::ComputeConv3DInfo	(	const armnn::Convolution3dQueueDescriptor	queueDescriptor,
		bool	isFastMathEnabled
	)

inline

Definition at line 305 of file ArmComputeUtils.hpp.

 {
     auto descriptor = queueDescriptor.m_Parameters;
     const arm_compute::Size3D    stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
     const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
                                          descriptor.m_PadTop, descriptor.m_PadBottom,
                                          descriptor.m_PadFront, descriptor.m_PadBack};
     const arm_compute::Size3D    dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
  
     const arm_compute::ActivationLayerInfo activationInfo =
             ConvertAdditionalInfoToAclActivationLayerInfo(queueDescriptor);
     const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
  
     return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
 }

References ConvertAdditionalInfoToAclActivationLayerInfo(), QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Convolution3dDescriptor::m_StrideX.

◆ ComputeReductionTensorShape()

const TensorInfo armnn::ComputeReductionTensorShape	(	const armnn::TensorInfo &	input,
		const std::vector< uint32_t > &	vAxis,
		const bool	keepDims
	)

inline

Function to compute the output tensor shape based on the axes and if keepDims is set.

Definition at line 347 of file ArmComputeUtils.hpp.

 {
     auto reducedTensorInfo = input;
     unsigned int rank = reducedTensorInfo.GetNumDimensions();
     unsigned int outputRank = 0;
     // Calculate output dimension
     if (keepDims)
     {
         outputRank = rank;
     }
     else if (vAxis.empty())
     {
         outputRank = 1;
     }
     else if (vAxis.size() > reducedTensorInfo.GetNumDimensions())
     {
         throw LayerValidationException("ReduceLayer: Dimensions to reduce can not be bigger than input dimensions");
     }
     else
     {
         outputRank = reducedTensorInfo.GetNumDimensions() - armnn::numeric_cast<unsigned int>(vAxis.size());
         if (outputRank == 0)
         {
             outputRank = 1;
         }
     }
     std::vector<unsigned int> dimSizes(outputRank, 1);
     if (!vAxis.empty())
     {
         // Skip the dimension that has been reduced unless keepDims is true.
         unsigned int outputIndex = 0;
         for (unsigned int i = 0; i < reducedTensorInfo.GetNumDimensions(); ++i)
         {
             if (std::find(vAxis.begin(), vAxis.end(), i) == vAxis.end())
             {
                 dimSizes[outputIndex] = armnn::numeric_cast<unsigned int>(reducedTensorInfo.GetShape()[i]);
                 ++outputIndex;
             }
             else if (keepDims)
             {
                 dimSizes[outputIndex] = 1;
                 ++outputIndex;
             }
         }
     }
     const TensorShape inferredShape = TensorShape(outputRank, dimSizes.data());
     reducedTensorInfo.SetShape(inferredShape);
     return reducedTensorInfo;
 }

References TensorInfo::GetNumDimensions().

Referenced by ChainReduceLayers().

◆ ComputeSoftmaxAclAxis()

T armnn::ComputeSoftmaxAclAxis	(	const SoftmaxDescriptor &	softmaxDesc,
		const armnn::TensorInfo &	tensor
	)

inline

Definition at line 227 of file ArmComputeUtils.hpp.

 {
     // Detect the Android default value of -1 and return the ACL default value of 0.
     if (softmaxDesc.m_Axis == -1)
     {
         return 0;
     }
  
     unsigned int dim = tensor.GetNumDimensions();
  
     ARMNN_ASSERT(dim != 0);
  
     // Currently ArmNN support axis 1.
     auto aclAxis = (static_cast<T>(dim) - 1);
     aclAxis = aclAxis > 0 ? aclAxis -1 : aclAxis;
  
     return aclAxis;
 }

References ARMNN_ASSERT, TensorInfo::GetNumDimensions(), and SoftmaxDescriptor::m_Axis.

◆ ComputeSplitAxis()

std::set<unsigned int> armnn::ComputeSplitAxis	(	const armnn::SplitterDescriptor &	desc,
		const TensorShape &	input
	)

inline

Definition at line 246 of file ArmComputeUtils.hpp.

 {
     unsigned int numSplit = desc.GetNumViews();
     unsigned int numDimensions = desc.GetNumDimensions();
     std::set<unsigned int> splitAxis;
  
     if (desc.HasAxis())
     {
         splitAxis.insert(armnnUtils::GetUnsignedAxis(desc.GetNumDimensions(), desc.GetAxis()));
     }
     else
     {
         for (unsigned int i = 0; i < numSplit; ++i)
         {
             for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
             {
                 if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
                 {
                     splitAxis.insert(dimIdx);
                 }
             }
         }
     }
     return splitAxis;
 }

References ViewsDescriptor::GetAxis(), ViewsDescriptor::GetNumDimensions(), ViewsDescriptor::GetNumViews(), armnnUtils::GetUnsignedAxis(), ViewsDescriptor::GetViewSizes(), and ViewsDescriptor::HasAxis().

Referenced by ClSplitterWorkload::ClSplitterWorkload(), ClLayerSupport::IsSplitterSupported(), NeonLayerSupport::IsSplitterSupported(), and NeonSplitterWorkload::NeonSplitterWorkload().

◆ Concatenate()

void Concatenate	(	const ConcatQueueDescriptor &	data,
		std::vector< ITensorHandle * >	inputs,
		std::vector< ITensorHandle * >	outputs
	)

Definition at line 14 of file Concatenate.cpp.

 {
     const TensorInfo& outputInfo0 = GetTensorInfo(outputs[0]);
  
     std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo0, outputs[0]->Map());
     Encoder<float>& encoder = *encoderPtr;
  
     for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index)
     {
         unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
  
         unsigned int indexRemainder = index;
         unsigned int dimensionStride = outputInfo0.GetNumElements();
  
         for (unsigned int i = 0; i < outputInfo0.GetNumDimensions(); i++)
         {
             dimensionStride /= outputInfo0.GetShape()[i];
             indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
             indexRemainder -= indices[i] * dimensionStride;
         }
  
         for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
         {
             ConcatQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
  
             //Split view extents are defined by the size of (the corresponding) input tensor.
             const TensorInfo& inputInfo = GetTensorInfo(inputs[viewIdx]);
             ARMNN_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions());
  
             // Check all dimensions to see if this element is inside the given input view.
             bool insideView = true;
             for (unsigned int i = 0; i < inputInfo.GetNumDimensions(); i++)
             {
                 if (indices[i] < view.m_Origin[i])
                 {
                     insideView = false;
                 }
                 if (indices[i] >= view.m_Origin[i] + inputInfo.GetShape()[i])
                 {
                     insideView = false;
                 }
             }
  
             if (insideView)
             {
                 std::unique_ptr<Decoder<float>> decoderPtr =
                     MakeDecoder<float>(inputInfo,inputs[viewIdx]->Map());
                 Decoder<float>& decoder = *decoderPtr;
                 unsigned int inIndex = 0;
                 unsigned int dimensionStride = 1;
  
                 for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;)
                 {
                     inIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
                     dimensionStride *= inputInfo.GetShape()[i];
                 }
                 decoder += inIndex;
                 encoder.Set(decoder.Get());
  
                 //What should we do if input views overlap on the output tensor?
                 //We could error, take the average, or shm else...
                 //For now just stop after finding first view (input) that matches.
                 break;
             }
         }
         ++encoder;
     }
 }

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), ConcatQueueDescriptor::ViewOrigin::m_Origin, ConcatQueueDescriptor::m_ViewOrigins, Map, MaxNumOfTensorDimensions, and Encoder< IType >::Set().

◆ ConditionalThrow() [1/2]

void armnn::ConditionalThrow ( bool condition )

Definition at line 174 of file Exceptions.hpp.

 {
     if (!condition)
     {
         throw ExceptionType();
     }
 }

◆ ConditionalThrow() [2/2]

void armnn::ConditionalThrow	(	bool	condition,
		const std::string &	message
	)

Definition at line 165 of file Exceptions.hpp.

 {
     if (!condition)
     {
         throw ExceptionType(message);
     }
 }

◆ ConditionalThrowIfNotEqual()

void armnn::ConditionalThrowIfNotEqual	(	const std::string &	message,
		const ComparedType &	leftHandSide,
		const ComparedType &	rightHandSide
	)

ComparedType must support: operator==(const ComparedType&) operator<<(ostream&, const ComparedType&)

Definition at line 189 of file Exceptions.hpp.

 {
     if (!(leftHandSide == rightHandSide))
     {
         std::stringstream ss;
         ss << message << " : " << leftHandSide << " != " << rightHandSide;
         throw ExceptionType(ss.str());
     }
 }

◆ ConfigureDetailsObject()

void armnn::ConfigureDetailsObject	(	JsonChildObject &	detailsObject,
		std::string	layerDetailsStr
	)

Definition at line 295 of file Profiling.cpp.

 {
     detailsObject.SetType(JsonObjectType::ExecObjectDesc);
     detailsObject.SetAndParseDetails(layerDetailsStr);
  
 }

References ExecObjectDesc, JsonChildObject::SetAndParseDetails(), and JsonChildObject::SetType().

◆ ConfigureLogging()

void ConfigureLogging	(	bool	printToStandardOutput,
		bool	printToDebugOutput,
		LogSeverity	severity
	)

Configures the logging behaviour of the ARMNN library.

printToStandardOutput: Set to true if log messages should be printed to the standard output. printToDebugOutput: Set to true if log messages be printed to a platform-specific debug output (where supported). severity: All log messages that are at this severity level or higher will be printed, others will be ignored.

Examples: AsyncExecutionSample.cpp, CustomMemoryAllocatorSample.cpp, and SimpleSample.cpp.

Definition at line 18 of file Utils.cpp.

 {
     SetAllLoggingSinks(printToStandardOutput, printToDebugOutput, false);
     SetLogFilter(severity);
 }

References SetAllLoggingSinks(), and SetLogFilter().

Referenced by ArmnnDevice::ArmnnDevice(), ConfigureLoggingTest(), and main().

◆ ConfigureTuner()

void armnn::ConfigureTuner	(	arm_compute::CLTuner &	tuner,
		TuningLevel	level
	)

inline

Definition at line 44 of file ArmComputeTuningUtils.hpp.

 {
     tuner.set_tune_new_kernels(true); // Turn on tuning initially.
  
     switch (level)
     {
         case TuningLevel::Rapid:
             ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)";
             tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID);
             break;
         case TuningLevel::Normal:
             ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)";
             tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL);
             break;
         case TuningLevel::Exhaustive:
             ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)";
             tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE);
             break;
         case TuningLevel::None:
         default:
             tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode.
             break;
     }
 }

References ARMNN_LOG, Exhaustive, info, None, Normal, and Rapid.

Referenced by ClBackendContext::ClBackendContext(), and GpuFsaBackendContext::GpuFsaBackendContext().

◆ ConnectedToLayerType()

bool armnn::ConnectedToLayerType	(	Layer *	baseLayer,
		LayerType	layerType,
		unsigned int	dimSize = `0`
	)

inline

Checks the Layer's Connections to see if it's connected to a Layer with the provided layerType.

If dimSize is provided will also check if the connecting Tensor has more than that number of dimensions

Definition at line 271 of file SubgraphUtils.hpp.

 {
     Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
     TensorInfo parentTensorInfo = baseLayer->GetInputSlot(0).GetTensorInfo();
  
     if (parentTensorInfo.GetNumDimensions() > dimSize && parentLayer.GetType() == layerType)
     {
         return true;
     }
     for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
     {
         Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
         TensorInfo nextTensorInfo = baseLayer->GetOutputSlot(0).GetConnection(i)->GetTensorInfo();
  
         if (nextTensorInfo.GetNumDimensions() > dimSize && nextLayer.GetType() == layerType)
         {
             return true;
         }
     }
     return false;
 }

References InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), Layer::GetInputSlot(), TensorInfo::GetNumDimensions(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), InputSlot::GetTensorInfo(), and Layer::GetType().

◆ ConnectedToLayerWithNCHW()

bool armnn::ConnectedToLayerWithNCHW ( Layer * baseLayer )

inline

Checks if the Layer is connected to any Layer that has an NCHW layout.

Definition at line 250 of file SubgraphUtils.hpp.

 {
     Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
  
     if (IsNCHW(parentLayer))
     {
         return true;
     }
     for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
     {
         Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
         if (IsNCHW(nextLayer))
         {
             return true;
         }
     }
     return false;
 }

References InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), Layer::GetInputSlot(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), and IsNCHW().

Referenced by NeonBackend::OptimizeSubgraphView(), and ClBackend::OptimizeSubgraphView().

◆ Convert1HWOTensorInfoToAcl()

std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl	(	const TensorInfo &	weightInfo,
		const TensorInfo &	inputInfo,
		const DataLayout	dataLayout
	)

Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library Returns a tuple of converted weights tensor info and depth multiplier.

Definition at line 176 of file WorkloadUtils.cpp.

 {
     unsigned int aclDepthMultiplier = 1;
     TensorInfo weightsPermuted;
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         // No permutation required. Input and weights data layouts are the same.
         aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[3];
         weightsPermuted = weightInfo;
     }
  
     else if (dataLayout == armnn::DataLayout::NCHW)
     {
         // Weights permutation required. Weights [N,H,W,C] and input [N,C,H,W] data layouts are different.
         // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
         aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[1];
         PermutationVector permutationVector{ 0, 2, 3, 1 };
         weightsPermuted = armnnUtils::Permuted(weightInfo, permutationVector);
     }
     else
     {
         throw InvalidArgumentException(fmt::format("Unknown data layout for tensor info conversion: {}",
                                                    GetDataLayoutName(dataLayout)));
     }
  
     return std::make_tuple(weightsPermuted, aclDepthMultiplier);
 }

References GetDataLayoutName(), TensorInfo::GetShape(), NCHW, NHWC, and armnnUtils::Permuted().

◆ Convert1HWOTensorToAcl()

std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl	(	const ConstTensorHandle *	weightTensor,
		const TensorInfo &	inputInfo,
		const DataLayout	dataLayout,
		void *	permuteBuffer
	)

Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library.

Parameters

weightTensor	- ConstTensorHandle of weights tensor
inputInfo	- TensorInfo of input tensor
dataLayout	- DataLayout of the input tensor
permuteBuffer	- Pointer to memory with the size of tensor. Used for the permutation

Returns: tuple of transformed weights-ConstTensor and depthwise multiplier

Definition at line 145 of file WorkloadUtils.cpp.

 {
     TensorInfo weightsInfo = weightTensor->GetTensorInfo();
     unsigned int depthMultiplier = 1;
     PermutationVector permutationVector{};
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         // No permutation required. Data layouts are the same.
  
         depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[3];
     }
     else if (dataLayout == armnn::DataLayout::NCHW)
     {
         // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
         depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[1];
         permutationVector = { 0, 2, 3, 1 };
     }
     else
     {
         throw InvalidArgumentException(fmt::format("Unknown data layout for tensor conversion: {}",
                                                    GetDataLayoutName(dataLayout)));
     }
  
     ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
  
     return std::make_tuple(weightsPermuted, depthMultiplier);
 }

References GetDataLayoutName(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), NCHW, NHWC, and PermuteTensor().

◆ Convert1HWOtoMIHW()

std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW	(	const ConstTensorHandle *	weightTensor,
		const TensorInfo &	inputInfo,
		const DataLayout &	dataLayout,
		void *	permuteBuffer
	)

Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W].

Parameters

weightTensor	- ConstTensorHandle of the weight tensor that should be converted
inputInfo	- TensorInfo of the corresponding input tensor
dataLayout	- DataLayout of the input tensor e.g. NHWC or NCHW
permuteBuffer	- Memory location with the same size as the weight tensor to write converted data to

Returns: - A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier

Definition at line 207 of file WorkloadUtils.cpp.

 {
     TensorInfo weightsInfo = weightTensor->GetTensorInfo();
  
     if (weightsInfo.HasPerAxisQuantization())
     {
         throw InvalidArgumentException("Can't convert tensor from [1,H,W,Cout] to [M,Cin,H,W] when per channel "
                                        "quantization is applied.");
     }
  
     // Reshape weights  [ 1, H, W, I*M ] --> [ H, W, I, M ]
     auto weightsShape = weightsInfo.GetShape();
     auto channelIndex = armnnUtils::DataLayoutIndexed(dataLayout).GetChannelsIndex();
     unsigned int depthMultiplier = weightsShape[3] / inputInfo.GetShape()[channelIndex];
     weightsInfo.SetShape({ weightsShape[1],
                            weightsShape[2],
                            inputInfo.GetShape()[channelIndex],
                            depthMultiplier});
  
     // Permute [ H, W, I, M ] --> [ M, I, H, W ]
     PermutationVector permutationVector = { 2, 3, 1, 0 };
     ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
  
     return std::make_tuple(weightsPermuted, depthMultiplier);
 }

References DataLayoutIndexed::GetChannelsIndex(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), TensorInfo::HasPerAxisQuantization(), PermuteTensor(), and TensorInfo::SetShape().

◆ ConvertActivationDescriptorToAclActivationLayerInfo() [1/2]

arm_compute::ActivationLayerInfo armnn::ConvertActivationDescriptorToAclActivationLayerInfo ( const ActivationDescriptor & actDesc )

inline

Definition at line 87 of file ArmComputeUtils.hpp.

 {
     return arm_compute::ActivationLayerInfo(ConvertActivationFunctionToAclActivationFunction(actDesc.m_Function),
         actDesc.m_A, actDesc.m_B);
 }

References ConvertActivationFunctionToAclActivationFunction(), ActivationDescriptor::m_A, ActivationDescriptor::m_B, and ActivationDescriptor::m_Function.

Referenced by ClActivationWorkload::ClActivationWorkload(), ClSqrtWorkload::ClSqrtWorkload(), ComputeConv3DInfo(), ConvertActivationDescriptorToAclActivationLayerInfo(), ConvertAdditionalInfoToAclActivationLayerInfo(), ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(), NeonActivationWorkload::NeonActivationWorkload(), and NeonSqrtWorkload::NeonSqrtWorkload().

◆ ConvertActivationDescriptorToAclActivationLayerInfo() [2/2]

arm_compute::ActivationLayerInfo armnn::ConvertActivationDescriptorToAclActivationLayerInfo ( const ActivationDescriptor * activationDescPtr )

inline

Definition at line 94 of file ArmComputeUtils.hpp.

 {
     if (activationDescPtr != nullptr)
     {
         return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
                                                                            *activationDescPtr));
     }
     return arm_compute::ActivationLayerInfo();
 }

References ConvertActivationDescriptorToAclActivationLayerInfo().

◆ ConvertActivationFunctionToAclActivationFunction()

arm_compute::ActivationLayerInfo::ActivationFunction armnn::ConvertActivationFunctionToAclActivationFunction ( ActivationFunction armnnFunction )

inline

Definition at line 62 of file ArmComputeUtils.hpp.

 {
     using AclActivationFunction = arm_compute::ActivationLayerInfo::ActivationFunction;
  
     switch (armnnFunction)
     {
         case ActivationFunction::Linear:        return AclActivationFunction::LINEAR;
         // Arm compute's 'logistic' function is non-parameterized, so it is exactly a sigmoid function.
         case ActivationFunction::Sigmoid:       return AclActivationFunction::LOGISTIC;
         case ActivationFunction::ReLu:          return AclActivationFunction::RELU;
         case ActivationFunction::BoundedReLu:   return AclActivationFunction::LU_BOUNDED_RELU;
         case ActivationFunction::SoftReLu:      return AclActivationFunction::SOFT_RELU;
         case ActivationFunction::LeakyReLu:     return AclActivationFunction::LEAKY_RELU;
         case ActivationFunction::Abs:           return AclActivationFunction::ABS;
         case ActivationFunction::Sqrt:          return AclActivationFunction::SQRT;
         case ActivationFunction::Square:        return AclActivationFunction::SQUARE;
         case ActivationFunction::TanH:          return AclActivationFunction::TANH;
         case ActivationFunction::Elu:           return AclActivationFunction::ELU;
         case ActivationFunction::HardSwish:     return AclActivationFunction::HARD_SWISH;
         case ActivationFunction::Gelu:          return AclActivationFunction::GELU;
         default:                                throw InvalidArgumentException("Unsupported activation function");
     }
 }

References Abs, BoundedReLu, Elu, Gelu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by ConvertActivationDescriptorToAclActivationLayerInfo().

◆ ConvertAdditionalInfoToAclActivationLayerInfo()

arm_compute::ActivationLayerInfo armnn::ConvertAdditionalInfoToAclActivationLayerInfo ( const QueueDescriptor & queueDescriptor )

inline

Definition at line 105 of file ArmComputeUtils.hpp.

 {
     const ActivationDescriptor* activationDescPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
  
     if (activationDescPtr != nullptr)
     {
         return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
                 *activationDescPtr));
     }
     return arm_compute::ActivationLayerInfo();
 }

References ConvertActivationDescriptorToAclActivationLayerInfo(), and QueueDescriptor::GetAdditionalInformation().

Referenced by ClAdditionWorkload::ClAdditionWorkload(), ClBatchMatMulWorkload::ClBatchMatMulWorkload(), ClDivisionWorkload::ClDivisionWorkload(), ClElementwiseBinaryWorkload::ClElementwiseBinaryWorkload(), ClFullyConnectedWorkload::ClFullyConnectedWorkload(), ClMultiplicationWorkload::ClMultiplicationWorkload(), ClSubtractionWorkload::ClSubtractionWorkload(), ComputeConv3DInfo(), NeonAdditionWorkload::NeonAdditionWorkload(), NeonBatchMatMulWorkload::NeonBatchMatMulWorkload(), NeonDivisionWorkload::NeonDivisionWorkload(), NeonElementwiseBinaryWorkload::NeonElementwiseBinaryWorkload(), NeonFusedWorkload::NeonFusedWorkload(), NeonMultiplicationWorkload::NeonMultiplicationWorkload(), and NeonSubtractionWorkload::NeonSubtractionWorkload().

◆ ConvertComparisonOperationToAcl()

arm_compute::ComparisonOperation armnn::ConvertComparisonOperationToAcl ( const ComparisonDescriptor & descriptor )

inline

Definition at line 141 of file ArmComputeUtils.hpp.

 {
     switch (descriptor.m_Operation)
     {
         case ComparisonOperation::Greater:         return arm_compute::ComparisonOperation::Greater;
         case ComparisonOperation::GreaterOrEqual:  return arm_compute::ComparisonOperation::GreaterEqual;
         case ComparisonOperation::Less:            return arm_compute::ComparisonOperation::Less;
         case ComparisonOperation::LessOrEqual:     return arm_compute::ComparisonOperation::LessEqual;
         case ComparisonOperation::Equal:           return arm_compute::ComparisonOperation::Equal;
         case ComparisonOperation::NotEqual:        return arm_compute::ComparisonOperation::NotEqual;
         default:                                   throw InvalidArgumentException("Unsupported comparison function");
     }
 }

References Equal, Greater, GreaterOrEqual, Less, LessOrEqual, ComparisonDescriptor::m_Operation, and NotEqual.

Referenced by ClComparisonWorkload::ClComparisonWorkload(), and NeonComparisonWorkload::NeonComparisonWorkload().

◆ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo() [1/2]

arm_compute::FullyConnectedLayerInfo armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo	(	const FullyConnectedDescriptor &	fullyConnectedDesc,
		arm_compute::ActivationLayerInfo	activationLayerInfo
	)

inline

Definition at line 204 of file ArmComputeUtils.hpp.

 {
     arm_compute::FullyConnectedLayerInfo fc_info;
     fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
     fc_info.activation_info = activationLayerInfo;
     return fc_info;
 }

References FullyConnectedDescriptor::m_TransposeWeightMatrix.

◆ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo() [2/2]

arm_compute::FullyConnectedLayerInfo armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo	(	const FullyConnectedDescriptor &	fullyConnectedDesc,
		const ActivationDescriptor *	activationDesc
	)

inline

Definition at line 194 of file ArmComputeUtils.hpp.

 {
     arm_compute::FullyConnectedLayerInfo fc_info;
     fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
     fc_info.activation_info = ConvertActivationDescriptorToAclActivationLayerInfo(activationDesc);
     return fc_info;
 }

References ConvertActivationDescriptorToAclActivationLayerInfo(), and FullyConnectedDescriptor::m_TransposeWeightMatrix.

Referenced by ClFullyConnectedWorkload::ClFullyConnectedWorkload().

◆ ConvertLogSeverity()

constexpr LogSeverity armnn::ConvertLogSeverity ( BoostLogSeverityMapping severity )

constexpr

Definition at line 206 of file Logging.hpp.

 {
     return static_cast<LogSeverity>(severity);
 }

◆ ConvertLstmActivationFuncToAclLayerInfo()

arm_compute::ActivationLayerInfo armnn::ConvertLstmActivationFuncToAclLayerInfo ( uint32_t activationFunction )

inline

Definition at line 118 of file ArmComputeUtils.hpp.

 {
     // For preparing the object for the class ActivationLayerInfo, we need to consider 5 situations.
     switch (activationFunction)
     {
         case 0:
             return arm_compute::ActivationLayerInfo(); // no activation, do nothing
         case 1:
             return arm_compute::ActivationLayerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
         case 3:
             return arm_compute::ActivationLayerInfo(
                 arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0);
         case 4:
             return arm_compute::ActivationLayerInfo(
                 arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0);
         case 6:
             return arm_compute::ActivationLayerInfo(
                 arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC);
         default:
             throw armnn::Exception("Wrong Type of Activation Function!");
     }
 }

◆ ConvertMaskToACLFormat()

int32_t ConvertMaskToACLFormat	(	int32_t	mask,
		int32_t	numDim
	)

Definition at line 298 of file WorkloadUtils.cpp.

 {
     int32_t reversedMask = 0;
     for (unsigned int i = 0; i < armnn::numeric_cast<unsigned int>(numDim); ++i)
     {
         // Check if bit set in mask for each dimension
         int32_t bit = (mask & 1 << i) != 0;
         // Increment the new mask with the bits reversed
         reversedMask += (bit << std::max(numDim-(armnn::numeric_cast<int>(i)+1), 0));
     }
  
     return reversedMask;
 }

Referenced by ClStridedSliceWorkload::ClStridedSliceWorkload(), and NeonStridedSliceWorkload::NeonStridedSliceWorkload().

◆ ConvertNormalizationAlgorithmChannelToAclNormType()

arm_compute::NormType armnn::ConvertNormalizationAlgorithmChannelToAclNormType ( NormalizationAlgorithmChannel channelType )

inline

Definition at line 182 of file ArmComputeUtils.hpp.

 {
     using arm_compute::NormType;
     switch (channelType)
     {
         case NormalizationAlgorithmChannel::Across: return NormType::CROSS_MAP;
         case NormalizationAlgorithmChannel::Within: return NormType::IN_MAP_2D;
         default:    throw InvalidArgumentException("Unsupported normalization algorithm channel type");
     }
 }

References Across, and Within.

◆ ConvertOutputShapeRoundingToAclDimensionRoundingType()

arm_compute::DimensionRoundingType armnn::ConvertOutputShapeRoundingToAclDimensionRoundingType ( OutputShapeRounding rounding )

inline

Definition at line 168 of file ArmComputeUtils.hpp.

 {
     using arm_compute::DimensionRoundingType;
  
     switch (rounding)
     {
         case OutputShapeRounding::Ceiling:  return DimensionRoundingType::CEIL;
         case OutputShapeRounding::Floor:    return DimensionRoundingType::FLOOR;
         default:                            throw InvalidArgumentException("Unsupported Output Shape Rounding type");
     }
 }

References Ceiling, and Floor.

◆ ConvertPaddingModeToAcl()

arm_compute::PaddingMode armnn::ConvertPaddingModeToAcl ( const PaddingMode & paddingMode )

inline

Definition at line 322 of file ArmComputeUtils.hpp.

 {
     switch (paddingMode)
     {
         case PaddingMode::Constant:   return arm_compute::PaddingMode::CONSTANT;
         case PaddingMode::Reflect:    return arm_compute::PaddingMode::REFLECT;
         case PaddingMode::Symmetric:  return arm_compute::PaddingMode::SYMMETRIC;
         default:                      throw InvalidArgumentException("Unsupported Padding Mode");
     }
 }

References Constant, Reflect, and Symmetric.

◆ ConvertPoolingAlgorithmToAclPoolingType()

arm_compute::PoolingType armnn::ConvertPoolingAlgorithmToAclPoolingType ( PoolingAlgorithm poolingAlgorithm )

inline

Definition at line 155 of file ArmComputeUtils.hpp.

 {
     using arm_compute::PoolingType;
  
     switch (poolingAlgorithm)
     {
         case PoolingAlgorithm::Max:             return PoolingType::MAX;
         case PoolingAlgorithm::Average:         return PoolingType::AVG;
         case PoolingAlgorithm::L2:              return PoolingType::L2;
         default:                                throw InvalidArgumentException("Unsupported pooling algorithm");
     }
 }

References Average, L2, and Max.

Referenced by CreatePool2dAttributes().

◆ ConvertReductionOperationToAcl()

arm_compute::ReductionOperation armnn::ConvertReductionOperationToAcl ( const ReduceDescriptor & descriptor )

inline

Definition at line 333 of file ArmComputeUtils.hpp.

 {
     switch (descriptor.m_ReduceOperation)
     {
         case ReduceOperation::Sum:    return arm_compute::ReductionOperation::SUM;
         case ReduceOperation::Mean:   return arm_compute::ReductionOperation::MEAN_SUM;
         case ReduceOperation::Max:    return arm_compute::ReductionOperation::MAX;
         case ReduceOperation::Min:    return arm_compute::ReductionOperation::MIN;
         case ReduceOperation::Prod:   return arm_compute::ReductionOperation::PROD;
         default:                      throw InvalidArgumentException("Unsupported Reduction operation");
     }
 }

References ReduceDescriptor::m_ReduceOperation, Max, Mean, Min, Prod, and Sum.

◆ ConvertResizeMethodToAclInterpolationPolicy()

arm_compute::InterpolationPolicy armnn::ConvertResizeMethodToAclInterpolationPolicy ( ResizeMethod resizeMethod )

inline

Definition at line 213 of file ArmComputeUtils.hpp.

 {
     switch (resizeMethod)
     {
         case ResizeMethod::Bilinear:
             return arm_compute::InterpolationPolicy::BILINEAR;
         case ResizeMethod::NearestNeighbor:
             return arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR;
         default:
             throw InvalidArgumentException("Unsupported resize method");
     }
 }

References Bilinear, and NearestNeighbor.

◆ ConvertWeightTensorFromArmnnToAcl()

armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl	(	const ConstTensorHandle *	weightTensor,
		DataLayout	dataLayout,
		void *	permuteBuffer
	)

Definition at line 236 of file WorkloadUtils.cpp.

 {
     if (weightTensor == nullptr)
     {
         throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null input tensor pointer");
     }
     if (permuteBuffer == nullptr)
     {
         throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null permute buffer pointer");
     }
  
     auto multiplier    = weightTensor->GetTensorInfo().GetShape()[0];
     auto inputChannels = weightTensor->GetTensorInfo().GetShape()[1];
  
     // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
     // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
  
     // 1. Permute the weights if necessary
     // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
     // starting from the current shape of [ M, I, H, W ]
     // If no permutation is necessary, leave the permutation vector empty
     PermutationVector permutationVector{};
     if (dataLayout == DataLayout::NHWC)
     {
         // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
         permutationVector = { 3, 2, 0, 1 };
     }
     ConstTensor weightPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
  
     // Shuffle the weights data to obtain the channel order needed used by Acl
     if (multiplier > 1 && inputChannels > 1 && dataLayout == DataLayout::NCHW)
     {
         switch (weightPermuted.GetDataType())
         {
             case DataType::Float32:
                 weightPermuted = ReorderWeightChannelsForAcl<float>(weightPermuted, dataLayout, permuteBuffer);
                 break;
             case DataType::Float16:
                 weightPermuted =
                     ReorderWeightChannelsForAcl<half_float::half>(weightPermuted, dataLayout, permuteBuffer);
                 break;
             case DataType::QAsymmS8:
             case DataType::QAsymmU8:
                 weightPermuted = ReorderWeightChannelsForAcl<uint8_t>(weightPermuted, dataLayout, permuteBuffer);
                 break;
             case DataType::QSymmS8:
                 weightPermuted = ReorderWeightChannelsForAcl<int8_t>(weightPermuted, dataLayout, permuteBuffer);
                 break;
             default:
                 break;
         }
     }
  
     // 2. Reshape the weights
     ReshapeWeightsForAcl(weightPermuted.GetInfo(), dataLayout);
  
     // 3. Return both the tensor and the allocated storage to ensure that the data stays alive
     return weightPermuted;
 }

References Float16, Float32, BaseTensor< MemoryType >::GetDataType(), BaseTensor< MemoryType >::GetInfo(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), NCHW, NHWC, PermuteTensor(), QAsymmS8, QAsymmU8, QSymmS8, and ReshapeWeightsForAcl().

◆ ConvertWeightTensorInfoFromArmnnToAcl()

TensorInfo ConvertWeightTensorInfoFromArmnnToAcl	(	const TensorInfo &	weightInfo,
		DataLayout	dataLayout
	)

Definition at line 121 of file WorkloadUtils.cpp.

 {
     // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
     // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
  
     // 1. Permute the weights if necessary
     // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
     // starting from the current shape of [ M, I, H, W ]
     TensorInfo weightPermutedInfo(weightInfo);
     if (dataLayout == DataLayout::NHWC)
     {
         // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
         PermutationVector permutationVector{ 3, 2, 0, 1 };
         weightPermutedInfo = armnnUtils::Permuted(weightInfo, permutationVector);
     }
  
     // 2. Reshape the weights
     ReshapeWeightsForAcl(weightPermutedInfo, dataLayout);
  
     // 3. Return the permuted weight info
     return weightPermutedInfo;
 }

References NHWC, armnnUtils::Permuted(), and ReshapeWeightsForAcl().

◆ Convolve()

void Convolve	(	const TensorShape &	rInputShape,
		Decoder< float > &	rInputDecoder,
		const TensorShape &	rOutputShape,
		Encoder< float > &	rOutputEncoder,
		const TensorShape &	rFilterShape,
		Decoder< float > &	rFilterDecoder,
		bool	biasEnabled,
		Decoder< float > *	pBiasDecoder,
		DataLayout	dataLayout,
		unsigned int	paddingTop,
		unsigned int	paddingLeft,
		unsigned int	xStride,
		unsigned int	yStride,
		unsigned int	xDilation,
		unsigned int	yDilation,
		bool	depthwise
	)

Definition at line 71 of file ConvImpl.cpp.

 {
     if (biasEnabled && !pBiasDecoder)
     {
         throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
     }
     const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
  
     const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
     const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();
     const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();
  
     // Weights layout:
     // Conv2d:    [O,H,W,I]
     // Depthwise: [1,H,W,O]
     const unsigned int inputChannels   = rInputShape[channelsIndex];
     const unsigned int outputChannels  = rOutputShape[channelsIndex];
     const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
  
     const unsigned int batchSize    = rOutputShape[0];
     const unsigned int outputHeight = rOutputShape[heightIndex];
     const unsigned int outputWidth  = rOutputShape[widthIndex];
     const unsigned int inputHeight  = rInputShape[heightIndex];
     const unsigned int inputWidth   = rInputShape[widthIndex];
  
     const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
     const unsigned int filterWidth  = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
  
     const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
     const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);
  
     const TensorShape biasShape{outputChannels};
     const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
  
     for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
     {
         for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
         {
             for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
             {
                 for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
                 {
                     // This loop goes over each output element.
                     float sum = 0.0f;
  
                     // For depthwise, each output channel corresponds to exactly one input channel.
                     // For normal, must loop over each input channel.
                     for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
                     {
                         for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
                         {
                             for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
                             {
                                 // This loop goes over each input element for each output element.
                                 unsigned int filterIndex = 0;
  
                                 // Since dimensionality of kernel depends on depthwiseness, so does index.
                                 if (depthwise)
                                 {
                                     cInput = cOutput / depthMultiplier;
                                     // filterDepth = outputChannels;
                                     filterIndex = xFilter * outputChannels + cOutput +
                                                   yFilter * filterWidth * outputChannels;
                                 }
                                 else
                                 {
                                     // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
                                     // performance regression.
                                     if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
                                     {
                                         filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
                                                       yFilter * filterWidth * inputChannels +
                                                       xFilter * inputChannels +
                                                       cInput;
                                     }
                                     else
                                     {
                                         filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
                                                       cInput * filterWidth * filterHeight +
                                                       yFilter * filterWidth +
                                                       xFilter;
                                     }
                                 }
  
                                 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
                                 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
  
                                 float inputValue;
  
                                 // Check if we're in the padding.
                                 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
                                     xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
                                 {
                                     inputValue = 0.0f;
                                 }
                                 else
                                 {
                                     unsigned int inputIndex = 0;
  
                                     // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
                                     // performance regression.
                                     if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
                                     {
                                         inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
                                                      (yInput - paddingTop) * inputWidth * inputChannels +
                                                      (xInput - paddingLeft) * inputChannels +
                                                      cInput;
                                     }
                                     else
                                     {
                                         inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
                                                      inputWidth * inputHeight * cInput +
                                                      inputWidth * (yInput - paddingTop) +
                                                      xInput - paddingLeft;
                                     }
                                     inputValue = inputVec[inputIndex];
                                 }
  
                                 sum += filterVec[filterIndex] * inputValue;
                             }
                         }
                     }
  
                     if (biasEnabled)
                     {
                         sum += biasVec[cOutput];
                     }
  
                     unsigned int outIdx;
                     if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
                     {
                         outIdx =  batchIdx * outputHeight * outputWidth * outputChannels +
                                   yOutput * outputWidth * outputChannels +
                                   xOutput * outputChannels +
                                   cOutput;
                     }
                     else
                     {
                         outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
                                  cOutput * outputHeight * outputWidth +
                                  yOutput * outputWidth +
                                  xOutput;
                     }
  
                     rOutputEncoder[outIdx];
                     rOutputEncoder.Set(sum);
                 }
             }
         }
     }
 }

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), NHWC, and Encoder< IType >::Set().

◆ Convolve3d()

void Convolve3d	(	const TensorShape &	rInputShape,
		Decoder< float > &	rInputDecoder,
		const TensorShape &	rOutputShape,
		Encoder< float > &	rOutputEncoder,
		const TensorShape &	rFilterShape,
		Decoder< float > &	rFilterDecoder,
		bool	biasEnabled,
		Decoder< float > *	pBiasDecoder,
		DataLayout	dataLayout,
		unsigned int	paddingTop,
		unsigned int	paddingLeft,
		unsigned int	paddingFront,
		unsigned int	xStride,
		unsigned int	yStride,
		unsigned int	zStride,
		unsigned int	xDilation,
		unsigned int	yDilation,
		unsigned int	zDilation
	)

Definition at line 11 of file Conv3dImpl.cpp.

 {
     if (biasEnabled && !pBiasDecoder)
     {
         throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
     }
     const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
  
     const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
     const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();
     const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();
     const unsigned int depthIndex    = dataLayoutIndexed.GetDepthIndex();
  
     const unsigned int inChannels   = rInputShape[channelsIndex];
     const unsigned int outChannels  = rOutputShape[channelsIndex];
  
     const unsigned int batchSize    = rOutputShape[0];
     const unsigned int outputHeight = rOutputShape[heightIndex];
     const unsigned int outputWidth  = rOutputShape[widthIndex];
     const unsigned int outputDepth  = rOutputShape[depthIndex];
     const unsigned int inputHeight  = rInputShape[heightIndex];
     const unsigned int inputWidth   = rInputShape[widthIndex];
     const unsigned int inputDepth   = rInputShape[depthIndex];
  
     // Conv3d weights layout: [D,H,W,I,O]
     const unsigned int filterDepth  = rFilterShape[0];
     const unsigned int filterHeight = rFilterShape[1];
     const unsigned int filterWidth  = rFilterShape[2];
  
     const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
     const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape);
  
     const TensorShape biasShape{outChannels};
     const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
  
     for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
     {
         for (unsigned int zOutput = 0; zOutput < outputDepth; zOutput++)
         {
             for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
             {
                 for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
                 {
                     for (unsigned int cOutput = 0; cOutput < outChannels; cOutput++)
                     {
                         // This loop goes over each output element.
                         float sum = 0.0f;
  
                         // Loop over each input channel.
                         for (unsigned int zFilter = 0; zFilter < filterDepth; zFilter++)
                         {
                             for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
                             {
                                 for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
                                 {
                                     for (unsigned int cInput = 0; cInput < inChannels; cInput++)
                                     {
                                         // This loop goes over each input element for each output element.
                                         unsigned int filterIndex = 0;
  
                                         // Conv3d weights layout: [D,H,W,I,O]
                                         // Keep this implementation, as using DataLayoutIndexed::GetIndex
                                         // causes large performance regression.
                                         filterIndex = zFilter * filterHeight * filterWidth * inChannels * outChannels +
                                                       yFilter * filterWidth * inChannels * outChannels +
                                                       xFilter * inChannels * outChannels +
                                                       cInput * outChannels +
                                                       cOutput;
  
                                         unsigned int yInput = yOutput * yStride + yFilter * yDilation;
                                         unsigned int xInput = xOutput * xStride + xFilter * xDilation;
                                         unsigned int zInput = zOutput * zStride + zFilter * zDilation;
  
                                         float inputValue;
  
                                         // Check if we're in the padding.
                                         if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
                                             xInput < paddingLeft || xInput >= inputWidth + paddingLeft ||
                                             zInput < paddingFront || zInput >= inputDepth + paddingFront)
                                         {
                                             inputValue = 0.0f;
                                         }
                                         else
                                         {
                                             unsigned int inputIndex = 0;
  
                                             // Keep this implementation, as using DataLayoutIndexed::GetIndex
                                             // causes large performance regression.
                                             if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)
                                             {
                                                 inputIndex =
                                                         batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
                                                         (zInput-paddingFront) * inputHeight * inputWidth * inChannels +
                                                         (yInput-paddingTop) * inputWidth * inChannels +
                                                         (xInput-paddingLeft) * inChannels +
                                                         cInput;
                                             }
                                             else
                                             {
                                                 // NCDHW DataLayout
                                                 inputIndex =
                                                         batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
                                                         inputDepth * inputHeight * inputWidth * cInput +
                                                         (zInput-paddingFront) * inputHeight * inputWidth +
                                                         (yInput-paddingTop) * inputWidth +
                                                         xInput-paddingLeft;
                                             }
  
                                             inputValue = inputVec[inputIndex];
                                         }
  
                                         sum += filterVec[filterIndex] * inputValue;
                                     }
                                 }
                             }
                         }
  
                         if (biasEnabled)
                         {
                             sum += biasVec[cOutput];
                         }
  
                         unsigned int outIdx;
                         if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)
                         {
                             outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
                                      zOutput * outputHeight * outputWidth * outChannels +
                                      yOutput * outputWidth * outChannels +
                                      xOutput * outChannels +
                                      cOutput;
                         }
                         else
                         {
                             // NCDHW DataLayout
                             outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
                                      cOutput * outputDepth * outputHeight * outputWidth +
                                      zOutput * outputHeight * outputWidth +
                                      yOutput * outputWidth +
                                      xOutput;
                         }
  
                         rOutputEncoder[outIdx];
                         rOutputEncoder.Set(sum);
                     }
                 }
             }
         }
     }
 }

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetDepthIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), NDHWC, and Encoder< IType >::Set().

◆ CoordinatesToIndex()

uint32_t armnn::CoordinatesToIndex	(	TensorShape &	shape,
		std::vector< uint32_t > &	coordinates
	)

Definition at line 32 of file Tile.cpp.

 {
     uint32_t index = 0;
     uint32_t base = 1;
     uint32_t rank = shape.GetNumDimensions();
     for (uint32_t i = rank; i > 0; --i)
     {
         index = index + coordinates[i - 1] * base;
         base = base * shape[i - 1];
     }
     return index;
 }

References TensorShape::GetNumDimensions().

Referenced by Tile().

◆ CopyArmComputeClTensorData()

void CopyArmComputeClTensorData	(	arm_compute::CLTensor &	dstTensor,
		const T *	srcData
	)

Definition at line 64 of file ClWorkloadUtils.hpp.

 {
     {
         ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting");
         dstTensor.map(true);
     }
  
     {
         ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor");
         armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor);
     }
  
     dstTensor.unmap();
 }

References ARMNN_SCOPED_PROFILING_EVENT_CL.

Referenced by ClConstantWorkload::Execute(), and GpuFsaConstantWorkload::Execute().

◆ CopyArmComputeTensorData()

void armnn::CopyArmComputeTensorData	(	arm_compute::Tensor &	dstTensor,
		const T *	srcData
	)

Definition at line 62 of file NeonWorkloadUtils.hpp.

 {
     InitialiseArmComputeTensorEmpty(dstTensor);
     CopyArmComputeITensorData(srcData, dstTensor);
 }

Referenced by InitializeArmComputeTensorData().

◆ CopyTensorContentsGeneric()

void armnn::CopyTensorContentsGeneric	(	const ITensorHandle *	srcTensor,
		ITensorHandle *	dstTensor,
		CopyFunc	copy
	)

Definition at line 46 of file WorkloadUtils.hpp.

 {
     // For ease of understanding, names are assigned to the dimensions
     // of the tensor as if NHWC, however this routine works with any 5D tensor
     static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
  
     TensorShape srcStrides      = srcTensor->GetStrides();
     const TensorShape& srcShape = srcTensor->GetShape();
     const auto srcSize          = srcTensor->GetStrides()[0] * srcShape[0];
     TensorShape dstStrides      = dstTensor->GetStrides();
     const TensorShape& dstShape = dstTensor->GetShape();
     const auto dstSize          = dstTensor->GetStrides()[0] * dstShape[0];
  
     size_t srcDepth    = 1;
     size_t srcBatches  = 1;
     size_t srcHeight   = 1;
     size_t srcWidth    = 1;
     size_t srcChannels = 1;
     AssignValues(srcShape.GetNumDimensions(),
                  0,
                  srcShape,
                  srcChannels,
                  srcWidth,
                  srcHeight,
                  srcBatches,
                  srcDepth);
  
     size_t srcDepthStride   = 0;
     size_t srcBatchStride   = 0;
     size_t srcHeightStride  = 0;
     size_t srcWidthStride   = 0;
     size_t srcChannelStride = 0;
     AssignValues(srcStrides.GetNumDimensions(),
                  0,
                  srcStrides,
                  srcChannelStride,
                  srcWidthStride,
                  srcHeightStride,
                  srcBatchStride,
                  srcDepthStride);
  
     size_t dstDepth    = 1;
     size_t dstBatches  = 1;
     size_t dstHeight   = 1;
     size_t dstWidth    = 1;
     size_t dstChannels = 1;
     AssignValues(dstShape.GetNumDimensions(),
                  0,
                  dstShape,
                  dstChannels,
                  dstWidth,
                  dstHeight,
                  dstBatches,
                  dstDepth);
  
     size_t dstDepthStride   = 0;
     size_t dstBatchStride   = 0;
     size_t dstHeightStride  = 0;
     size_t dstWidthStride   = 0;
     size_t dstChannelStride = 0;
     AssignValues(dstStrides.GetNumDimensions(),
                  0,
                  dstStrides,
                  dstChannelStride,
                  dstWidthStride,
                  dstHeightStride,
                  dstBatchStride,
                  dstDepthStride);
  
     const unsigned char* srcDataStart;
     unsigned char* dstDataStart;
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
         srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
         dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
     }
     if (srcDataStart == nullptr)
     {
         throw MemoryValidationException("The source tensor is null.");
     }
     if (dstDataStart == nullptr)
     {
         throw MemoryValidationException("The destination tensor is null.");
     }
  
     size_t copyLength  = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
     size_t copyWidth   = std::min(srcWidth, dstWidth);
     size_t copyHeight  = std::min(srcHeight, dstHeight);
     size_t copyBatches = std::min(srcBatches, dstBatches);
     size_t copyDepth   = std::min(srcDepth, dstDepth);
  
     // Coalesce inner dimensions where possible
     // to reduce overheard calling copy() and to
     // allow for memory bandwidth optimisations
     if (copyLength == srcWidthStride &&
         copyLength == dstWidthStride)
     {
         // There is no special padding between rows,
         // and sizes are compatible, so copy whole rows
         copyLength *= copyWidth;
         copyWidth = 1;
  
         if (copyLength == srcHeightStride &&
             copyLength == dstHeightStride)
         {
             // There is no special padding between batches
             // and sizes are compatible so copy whole batches
             copyLength *= copyHeight;
             copyHeight = 1;
         }
     }
  
     const unsigned char* srcData = srcDataStart;
     unsigned char* dstData = dstDataStart;
     for (unsigned int d = 0; d < copyDepth; ++d)
     {
         auto srcPtrDepth = srcData;
         auto dstPtrDepth = dstData;
         for (unsigned int b = 0; b < copyBatches; ++b)
         {
             auto srcPtrBatch = srcData;
             auto dstPtrBatch = dstData;
             for (unsigned int h = 0; h < copyHeight; ++h)
             {
                 auto srcPtrChannel = srcData;
                 auto dstPtrChannel = dstData;
                 for (unsigned int w = 0; w < copyWidth; ++w)
                 {
                     // Sanity check the memory area we've been asked to copy from and to.
                     if (copyLength > srcSize)
                     {
                         throw MemoryValidationException(
                             "The source tensor size does not match the size of the allocated tensor.");
                     }
                     if (copyLength > dstSize)
                     {
                         throw MemoryValidationException(
                             "The destination tensor size will overrun the destination tensor.");
                     }
                     copy(dstData, srcData, copyLength);
                     dstData += dstWidthStride;
                     srcData += srcWidthStride;
                 }
                 dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
                 srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
             }
             dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
             srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
         }
         dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
         srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
     }
  
     srcTensor->Unmap();
     dstTensor->Unmap();
 }

References ARMNN_SCOPED_PROFILING_EVENT, TensorShape::GetNumDimensions(), ITensorHandle::GetShape(), ITensorHandle::GetStrides(), ITensorHandle::Map(), MaxNumOfTensorDimensions, Undefined, and ITensorHandle::Unmap().

Referenced by CopyToOutputTensor(), CopyMemGenericWorkload::Execute(), NeonConvertFp32ToFp16Workload::Execute(), NeonConvertFp16ToFp32Workload::Execute(), and CopyMemGenericWorkload::ExecuteAsync().

◆ CopyToOutputTensor()

void armnn::CopyToOutputTensor	(	const Tensor &	outputTensor,
		ITensorHandle *	outputTensorHandle
	)

Definition at line 1388 of file LoadedNetwork.cpp.

 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyOutput");
     auto copyFunc = [](void* dst, const void* src, size_t size)
     {
         memcpy(dst, src, size);
     };
  
     std::unique_ptr<ITensorHandle> tensorHandle =
             std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
                                                       outputTensor.GetMemoryArea());
  
     CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
 }

References ARMNN_SCOPED_PROFILING_EVENT, CopyTensorContentsGeneric(), BaseTensor< MemoryType >::GetInfo(), BaseTensor< MemoryType >::GetMemoryArea(), and Undefined.

Referenced by LoadedNetwork::Execute().

◆ CreateAclNormalizationLayerInfoForL2Normalization()

arm_compute::NormalizationLayerInfo armnn::CreateAclNormalizationLayerInfoForL2Normalization	(	const armnn::TensorInfo &	tensorInfo,
		armnn::DataLayout	dataLayout
	)

inline

Definition at line 29 of file ArmComputeUtils.hpp.

 {
     unsigned int depthDimension = dataLayout == armnn::DataLayout::NCHW ? 1 : 3;
     const unsigned int depth = tensorInfo.GetShape()[depthDimension];
  
     // At the time of writing, {CL|Neon}L2Normalization performs the reduction only along dimension 0. This version of
     // L2 Normalization always performs the reduction along the depth axis, though. Thus, we repurpose
     // {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by carefully chosing the normalization
     // parameters.
     //
     // Please refer to both the reference implementation of the normalization layer and the implementation of
     // {CL|Neon}NormalizationLayer when checking the derivations for the parameter values below.
  
     // Make sure normalization covers the entire depth range. ACL requires the normalization size to be odd.
     // CL: This does not result in extra kernel threads not doing any work: See usage of the RADIUS parameter in
     // ACL's normalization_layer_cross_map() CL function.
     const uint32_t normSize = depth * 2u + 1u;
  
     // See ACL's NormalizationLayerInfo::scale_coeff() definition.
     // For the reference implementation, to make alpha_ become 1, we'd have to use alpha = normSize instead.
     const float alpha = 1.0f;
  
     // Don't offset the reduction.
     const float kappa = 0.0f;
  
     // pow(reduction, -0.5) = 1 / sqrt(reduction)
     const float beta = 0.5f;
  
     return arm_compute::NormalizationLayerInfo(arm_compute::NormType::CROSS_MAP, normSize, alpha, beta, kappa, false);
 }

References TensorInfo::GetShape(), and NCHW.

◆ CreateClContext()

flatbuffers::Offset<ClContext> armnn::CreateClContext	(	flatbuffers::FlatBufferBuilder &	_fbb,
		flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>>	programs = `0`
	)

inline

Definition at line 57 of file ClContextSchema_generated.h.

                                                                                         {
   ClContextBuilder builder_(_fbb);
   builder_.add_programs(programs);
   return builder_.Finish();
 }

References ClContextBuilder::add_programs(), and ClContextBuilder::Finish().

Referenced by CreateClContextDirect(), and ClContextSerializer::Serialize().

◆ CreateClContextDirect()

flatbuffers::Offset<ClContext> armnn::CreateClContextDirect	(	flatbuffers::FlatBufferBuilder &	_fbb,
		const std::vector< flatbuffers::Offset< armnn::Program >> *	programs = `nullptr`
	)

inline

Definition at line 65 of file ClContextSchema_generated.h.

                                                                           {
   auto programs__ = programs ? _fbb.CreateVector<flatbuffers::Offset<armnn::Program>>(*programs) : 0;
   return armnn::CreateClContext(
       _fbb,
       programs__);
 }

References CreateClContext().

◆ CreateDescriptorForConcatenation()

OriginsDescriptor armnn::CreateDescriptorForConcatenation	(	TensorShapeIt	first,
		TensorShapeIt	last,
		unsigned int	concatenationDimension
	)

Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing concatenation of a number of input tensors.

Definition at line 300 of file Descriptors.hpp.

 {
     auto numInputs = std::distance(first, last);
  
     if (numInputs < 2)
     {
         throw InvalidArgumentException("Concatenation requires at least 2 inputs");
     }
  
     const auto& firstInputShape = *first;
  
     const unsigned int numDimensions = firstInputShape.GetNumDimensions();
     for (auto it = first + 1; it != last; ++it)
     {
         if (it->GetNumDimensions() != numDimensions)
         {
             throw InvalidArgumentException("All inputs to concatenation must have the same number of dimensions");
         }
     }
  
     if (concatenationDimension >= numDimensions)
     {
         throw InvalidArgumentException("concatenationDimension must be between 0 and the number of dimensions.");
     }
  
     for (auto it = first; it != last; ++it)
     {
         for (unsigned int d = 0; d < numDimensions; ++d)
         {
             const bool dimSizeOk = (d == concatenationDimension) || (firstInputShape[d] == (*it)[d]);
             if (!dimSizeOk)
             {
                 throw InvalidArgumentException("All inputs to concatenation must be the same size along all dimensions "
                     " except the concatenation dimension");
             }
         }
     }
  
     OriginsDescriptor viewsDescriptor(static_cast<uint32_t>(numInputs), numDimensions);
     viewsDescriptor.SetConcatAxis(concatenationDimension);
  
     uint32_t viewIndex = 0u;
     uint32_t coordAlongConcatDim = 0u;
     for (auto it = first; it != last; ++it)
     {
         const auto& inputShape = *it;
  
         for (unsigned int i = 0; i < concatenationDimension; ++i)
         {
             viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
         }
  
         viewsDescriptor.SetViewOriginCoord(viewIndex, concatenationDimension, coordAlongConcatDim);
         unsigned int dimSize = inputShape[concatenationDimension];
         coordAlongConcatDim += dimSize;
  
  
         for (unsigned int i = concatenationDimension + 1; i < numDimensions; ++i)
         {
             viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
         }
  
         ++viewIndex;
     }
  
     return viewsDescriptor;
 }

References OriginsDescriptor::SetConcatAxis(), and OriginsDescriptor::SetViewOriginCoord().

◆ CreateInputsFrom()

SubgraphView::InputSlots armnn::CreateInputsFrom ( Layer * layer )

inline

Definition at line 42 of file GpuFsaBackend.cpp.

 {
     SubgraphView::InputSlots result;
     for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it)
     {
         result.push_back(&(*it));
     }
     return result;
 }

References Layer::BeginInputSlots(), and Layer::EndInputSlots().

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ CreateOutputsFrom()

SubgraphView::OutputSlots armnn::CreateOutputsFrom ( Layer * layer )

inline

Definition at line 52 of file GpuFsaBackend.cpp.

 {
     SubgraphView::OutputSlots result;
     for (auto&& it = layer->BeginOutputSlots(); it != layer->EndOutputSlots(); ++it)
     {
         result.push_back(&(*it));
     }
     return result;
 }

References Layer::BeginOutputSlots(), and Layer::EndOutputSlots().

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ CreateProgram()

flatbuffers::Offset<Program> armnn::CreateProgram	(	flatbuffers::FlatBufferBuilder &	_fbb,
		flatbuffers::Offset< flatbuffers::String >	name = `0`,
		flatbuffers::Offset< flatbuffers::Vector< uint8_t >>	binary = `0`
	)

inline

Definition at line 118 of file ClContextSchema_generated.h.

                                                               {
   ProgramBuilder builder_(_fbb);
   builder_.add_binary(binary);
   builder_.add_name(name);
   return builder_.Finish();
 }

References ProgramBuilder::add_binary(), ProgramBuilder::add_name(), and ProgramBuilder::Finish().

Referenced by CreateProgramDirect(), and ClContextSerializer::Serialize().

◆ CreateProgramDirect()

flatbuffers::Offset<Program> armnn::CreateProgramDirect	(	flatbuffers::FlatBufferBuilder &	_fbb,
		const char *	name = `nullptr`,
		const std::vector< uint8_t > *	binary = `nullptr`
	)

inline

Definition at line 128 of file ClContextSchema_generated.h.

                                                 {
   auto name__ = name ? _fbb.CreateString(name) : 0;
   auto binary__ = binary ? _fbb.CreateVector<uint8_t>(*binary) : 0;
   return armnn::CreateProgram(
       _fbb,
       name__,
       binary__);
 }

References CreateProgram().

◆ CreateSubgraphViewFrom()

SubgraphView::SubgraphViewPtr armnn::CreateSubgraphViewFrom	(	SubgraphView::InputSlots &&	inputs,
		SubgraphView::OutputSlots &&	outputs,
		SubgraphView::Layers &&	layers
	)

inline

Definition at line 62 of file GpuFsaBackend.cpp.

 {
     return std::make_unique<SubgraphView>(std::move(inputs), std::move(outputs), std::move(layers));
 }

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ CreateSupportedBackends()

BackendsMap CreateSupportedBackends	(	TensorHandleFactoryRegistry &	handleFactoryRegistry,
		BackendSettings &	backendSettings
	)

Definition at line 1309 of file Network.cpp.

 {
     BackendsMap backends;
     auto const& backendRegistry = BackendRegistryInstance();
     for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
     {
         auto backendFactory = backendRegistry.GetFactory(selectedBackend);
         auto backendObjPtr = backendFactory();
         ARMNN_ASSERT(backendObjPtr);
  
         backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
  
         backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
     }
  
     return backends;
 }

References ARMNN_ASSERT, BackendRegistryInstance(), and BackendSettings::m_SupportedBackends.

Referenced by Optimize().

◆ Debug()

void Debug	(	const TensorInfo &	inputInfo,
		const T *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex,
		bool	outputsToFile
	)

Definition at line 97 of file Debug.cpp.

 {
     if (outputsToFile)
     {
 #if !defined(ARMNN_DISABLE_FILESYSTEM)
         fs::path tmpDir = fs::temp_directory_path();
         std::ofstream out(tmpDir.generic_string() + "/ArmNNIntermediateLayerOutputs/" + layerName + ".numpy");
         PrintOutput<T>(inputInfo, inputData, guid, layerName, slotIndex, out);
         out.close();
 #endif
     }
     else
     {
         PrintOutput<T>(inputInfo, inputData, guid, layerName, slotIndex, std::cout);
     }
 }

◆ Debug< BFloat16 >()

template void armnn::Debug< BFloat16 >	(	const TensorInfo &	inputInfo,
		const BFloat16 *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex,
		bool	outputsToFile
	)

◆ Debug< float >()

template void armnn::Debug< float >	(	const TensorInfo &	inputInfo,
		const float *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex,
		bool	outputsToFile
	)

◆ Debug< Half >()

template void armnn::Debug< Half >	(	const TensorInfo &	inputInfo,
		const Half *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex,
		bool	outputsToFile
	)

◆ Debug< int16_t >()

template void armnn::Debug< int16_t >	(	const TensorInfo &	inputInfo,
		const int16_t *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex,
		bool	outputsToFile
	)

◆ Debug< int32_t >()

template void armnn::Debug< int32_t >	(	const TensorInfo &	inputInfo,
		const int32_t *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex,
		bool	outputsToFile
	)

◆ Debug< int64_t >()

template void armnn::Debug< int64_t >	(	const TensorInfo &	inputInfo,
		const int64_t *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex,
		bool	outputsToFile
	)

◆ Debug< int8_t >()

template void armnn::Debug< int8_t >	(	const TensorInfo &	inputInfo,
		const int8_t *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex,
		bool	outputsToFile
	)

◆ Debug< uint8_t >()

template void armnn::Debug< uint8_t >	(	const TensorInfo &	inputInfo,
		const uint8_t *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex,
		bool	outputsToFile
	)

◆ DeleteAsType()

void DeleteAsType ( const void *const blob )

inline

Definition at line 37 of file GpuFsaBackend.cpp.

 {
     delete static_cast<const T*>(blob);
 }

◆ DepthToSpace()

void DepthToSpace	(	const TensorInfo &	inputInfo,
		const DepthToSpaceDescriptor &	descriptor,
		const void *	inputData,
		void *	outputData,
		unsigned int	dataTypeSize
	)

Definition at line 18 of file DepthToSpace.cpp.

 {
     const unsigned int blockSize = descriptor.m_BlockSize;
     ARMNN_ASSERT(blockSize != 0u);
  
     const TensorShape& inputShape = inputInfo.GetShape();
     const unsigned int batches = inputShape[0];
  
     armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
     const unsigned int inDepth  = inputShape[dataLayoutIndexed.GetChannelsIndex()];
     const unsigned int inHeight = inputShape[dataLayoutIndexed.GetHeightIndex()];
     const unsigned int inWidth  = inputShape[dataLayoutIndexed.GetWidthIndex()];
  
     const unsigned int outDepth = inDepth / (blockSize * blockSize);
  
     // The 4D input data can be interpreted as 6D (implicitly reshaped) as follows:
     //
     // [batch, block size, block size, inDepth, inHeight, inWidth] for NCHW and
     // [batch, inHeight, inWidth, blockSize, blockSize, outDepth] for NHWC.
     //
     // DepthToSpace can then be implemented as a permutation in 6D resulting in
     // the following shapes:
     //
     // [batch, outDepth, inHeight, blockSize, inWidth, blockSize] for NCHW and
     // [batch, inHeight, blockSize, inWidth, blockSize, outDepth] for NHWC.
     //
     // NOTE:
     // Since 6D tensors are not currently supported, in practice we need to handle each
     // batch separately and execute 5D permutations
  
     TensorShape permDestShape;
     PermutationVector permVector{};
     if (descriptor.m_DataLayout == DataLayout::NCHW)
     {
         permDestShape = TensorShape({ outDepth, inHeight, blockSize, inWidth, blockSize });
         permVector    = { 2, 4, 0, 1, 3 };
     }
     else
     {
         permDestShape = TensorShape({ inHeight, blockSize, inWidth, blockSize, outDepth });
         permVector    = { 0, 2, 1, 3, 4 };
     }
  
     const unsigned int numElementsPerBatch = inputShape.GetNumElements() / batches;
  
     for (unsigned int batchIndex = 0u; batchIndex < batches; ++batchIndex)
     {
         const uintptr_t batchDataOffset = batchIndex * (numElementsPerBatch * dataTypeSize);
  
         armnnUtils::Permute(permDestShape,
                             permVector,
                             static_cast<const void*>(reinterpret_cast<const uint8_t*>(inputData) + batchDataOffset),
                             static_cast<void*>(reinterpret_cast<uint8_t*>(outputData) + batchDataOffset),
                             dataTypeSize);
     }
 }

References ARMNN_ASSERT, DepthToSpace(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumElements(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToDepthDescriptor::m_BlockSize, SpaceToDepthDescriptor::m_DataLayout, and armnnUtils::Permute().

Referenced by DepthToSpace().

◆ Dequantize() [1/4]

void armnn::Dequantize	(	const T *	inputData,
		float *	outputData,
		const TensorInfo &	info
	)

inline

Definition at line 113 of file RefWorkloadUtils.hpp.

 {
     for (unsigned int i = 0; i < info.GetNumElements(); i++)
     {
         outputData[i] = Dequantize<T>(inputData[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
     }
 }

References info.

◆ Dequantize() [2/4]

std::vector<float> armnn::Dequantize	(	const T *	quant,
		const TensorInfo &	info
	)

u8 helpers

Definition at line 102 of file RefWorkloadUtils.hpp.

 {
     std::vector<float> ret(info.GetNumElements());
     for (size_t i = 0; i < info.GetNumElements(); i++)
     {
         ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
     }
     return ret;
 }

References Dequantize(), and info.

◆ Dequantize() [3/4]

void Dequantize	(	Decoder< float > &	inputDecoder,
		Encoder< float > &	outputEncoder,
		const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo
	)

Definition at line 13 of file Dequantize.cpp.

 {
     IgnoreUnused(outputInfo);
     ARMNN_ASSERT(inputInfo.GetNumElements() == outputInfo.GetNumElements());
     for (unsigned int i = 0; i < inputInfo.GetNumElements(); i++)
     {
         // inputDecoder.Get() dequantizes the data element from whatever
         // type is given by inputInfo to fp32 (If MakeDecoder supports that dequantization)
         // outputEncoder.Set() transforms the data element to whatever type is
         // given by outputInfo (if MakeEncoder supports that transformation)
         outputEncoder.Set(inputDecoder.Get());
         ++outputEncoder;
         ++inputDecoder;
     }
 }

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumElements(), IgnoreUnused(), and Encoder< IType >::Set().

◆ Dequantize() [4/4]

float Dequantize	(	QuantizedType	value,
		float	scale,
		int32_t	offset
	)

Dequantize an 8-bit data type into a floating point data type.

Parameters

value	- The value to dequantize.
scale	- The scale (must be non-zero).
offset	- The offset.

Returns: - The dequantized value calculated as (value-offset)*scale.

Definition at line 52 of file TypesUtils.cpp.

 {
     static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
     if (scale == 0.f)
     {
         throw armnn::InvalidArgumentException("Dequantize: Scale cannot be 0.f");
     }
     if (std::isnan(value))
     {
         throw armnn::InvalidArgumentException("Dequantize: Value is NaN");
     }
     return (armnn::numeric_cast<float>(value - offset)) * scale;
 }

Referenced by QASymm8Decoder::DecodeTensor(), QASymmS8Decoder::DecodeTensor(), QSymmS8Decoder::DecodeTensor(), QSymm16Decoder::DecodeTensor(), QSymm8PerAxisDecoder::DecodeTensor(), ScaledInt32PerAxisDecoder::DecodeTensor(), SelectiveQuantizer< T, DoQuantize >::Dequantize(), Dequantize(), QASymm8Decoder::Get(), QASymmS8Decoder::Get(), QSymmS8Decoder::Get(), QSymm16Decoder::Get(), QASymm8Encoder::Get(), QASymmS8Encoder::Get(), QSymmS8Encoder::Get(), QSymm16Encoder::Get(), QSymm8PerAxisDecoder::Get(), QSymm8PerAxisEncoder::Get(), ScaledInt32PerAxisDecoder::Get(), and QSymm16PerAxisEncoder::Get().

◆ DetectionPostProcess()

void DetectionPostProcess	(	const TensorInfo &	boxEncodingsInfo,
		const TensorInfo &	scoresInfo,
		const TensorInfo &	anchorsInfo,
		const TensorInfo &	detectionBoxesInfo,
		const TensorInfo &	detectionClassesInfo,
		const TensorInfo &	detectionScoresInfo,
		const TensorInfo &	numDetectionsInfo,
		const DetectionPostProcessDescriptor &	desc,
		Decoder< float > &	boxEncodings,
		Decoder< float > &	scores,
		Decoder< float > &	anchors,
		float *	detectionBoxes,
		float *	detectionClasses,
		float *	detectionScores,
		float *	numDetections
	)

Definition at line 141 of file DetectionPostProcess.cpp.

 {
     IgnoreUnused(anchorsInfo, detectionClassesInfo, detectionScoresInfo, numDetectionsInfo);
  
     // Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format,
     // which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax)
     std::vector<float> boxCorners(boxEncodingsInfo.GetNumElements());
  
     const unsigned int numBoxes  = boxEncodingsInfo.GetShape()[1];
     const unsigned int numScores = scoresInfo.GetNumElements();
  
     for (unsigned int i = 0; i < numBoxes; ++i)
     {
         // Y
         float boxEncodingY = boxEncodings.Get();
         float anchorY      = anchors.Get();
  
         ++boxEncodings;
         ++anchors;
  
         // X
         float boxEncodingX = boxEncodings.Get();
         float anchorX      = anchors.Get();
  
         ++boxEncodings;
         ++anchors;
  
         // H
         float boxEncodingH = boxEncodings.Get();
         float anchorH      = anchors.Get();
  
         ++boxEncodings;
         ++anchors;
  
         // W
         float boxEncodingW = boxEncodings.Get();
         float anchorW      = anchors.Get();
  
         ++boxEncodings;
         ++anchors;
  
         float yCentre = boxEncodingY / desc.m_ScaleY * anchorH + anchorY;
         float xCentre = boxEncodingX / desc.m_ScaleX * anchorW + anchorX;
  
         float halfH = 0.5f * expf(boxEncodingH / desc.m_ScaleH) * anchorH;
         float halfW = 0.5f * expf(boxEncodingW / desc.m_ScaleW) * anchorW;
  
         unsigned int indexY = i * 4;
         unsigned int indexX = indexY + 1;
         unsigned int indexH = indexX + 1;
         unsigned int indexW = indexH + 1;
  
         // ymin
         boxCorners[indexY] = yCentre - halfH;
         // xmin
         boxCorners[indexX] = xCentre - halfW;
         // ymax
         boxCorners[indexH] = yCentre + halfH;
         // xmax
         boxCorners[indexW] = xCentre + halfW;
  
         ARMNN_ASSERT(boxCorners[indexY] < boxCorners[indexH]);
         ARMNN_ASSERT(boxCorners[indexX] < boxCorners[indexW]);
     }
  
     unsigned int numClassesWithBg = desc.m_NumClasses + 1;
  
     // Decode scores
     std::vector<float> decodedScores;
     decodedScores.reserve(numScores);
  
     for (unsigned int i = 0u; i < numScores; ++i)
     {
         decodedScores.emplace_back(scores.Get());
         ++scores;
     }
  
     // Perform Non Max Suppression.
     if (desc.m_UseRegularNms)
     {
         // Perform Regular NMS.
         // For each class, perform NMS and select max detection numbers of the highest score across all classes.
         std::vector<float> classScores(numBoxes);
  
         std::vector<unsigned int> selectedBoxesAfterNms;
         selectedBoxesAfterNms.reserve(numBoxes);
  
         std::vector<float> selectedScoresAfterNms;
         selectedBoxesAfterNms.reserve(numScores);
  
         std::vector<unsigned int> selectedClasses;
  
         for (unsigned int c = 0; c < desc.m_NumClasses; ++c)
         {
             // For each boxes, get scores of the boxes for the class c.
             for (unsigned int i = 0; i < numBoxes; ++i)
             {
                 classScores[i] = decodedScores[i * numClassesWithBg + c + 1];
             }
             std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes,
                                                                           boxCorners,
                                                                           classScores,
                                                                           desc.m_NmsScoreThreshold,
                                                                           desc.m_DetectionsPerClass,
                                                                           desc.m_NmsIouThreshold);
  
             for (unsigned int i = 0; i < selectedIndices.size(); ++i)
             {
                 selectedBoxesAfterNms.push_back(selectedIndices[i]);
                 selectedScoresAfterNms.push_back(classScores[selectedIndices[i]]);
                 selectedClasses.push_back(c);
             }
         }
  
         // Select max detection numbers of the highest score across all classes
         unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedBoxesAfterNms.size());
         unsigned int numOutput = std::min(desc.m_MaxDetections,  numSelected);
  
         // Sort the max scores among the selected indices.
         std::vector<unsigned int> outputIndices = GenerateRangeK(numSelected);
         TopKSort(numOutput, outputIndices.data(), selectedScoresAfterNms.data(), numSelected);
  
         AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, outputIndices,
                            selectedBoxesAfterNms, selectedClasses, selectedScoresAfterNms,
                            detectionBoxes, detectionScores, detectionClasses, numDetections);
     }
     else
     {
         // Perform Fast NMS.
         // Select max scores of boxes and perform NMS on max scores,
         // select max detection numbers of the highest score
         unsigned int numClassesPerBox = std::min(desc.m_MaxClassesPerDetection, desc.m_NumClasses);
         std::vector<float> maxScores;
         std::vector<unsigned int>boxIndices;
         std::vector<unsigned int>maxScoreClasses;
  
         for (unsigned int box = 0; box < numBoxes; ++box)
         {
             unsigned int scoreIndex = box * numClassesWithBg + 1;
  
             // Get the max scores of the box.
             std::vector<unsigned int> maxScoreIndices = GenerateRangeK(desc.m_NumClasses);
             TopKSort(numClassesPerBox, maxScoreIndices.data(),
                 decodedScores.data() + scoreIndex, desc.m_NumClasses);
  
             for (unsigned int i = 0; i < numClassesPerBox; ++i)
             {
                 maxScores.push_back(decodedScores[scoreIndex + maxScoreIndices[i]]);
                 maxScoreClasses.push_back(maxScoreIndices[i]);
                 boxIndices.push_back(box);
             }
         }
  
         // Perform NMS on max scores
         std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, maxScores,
                                                                       desc.m_NmsScoreThreshold,
                                                                       desc.m_MaxDetections,
                                                                       desc.m_NmsIouThreshold);
  
         unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedIndices.size());
         unsigned int numOutput = std::min(desc.m_MaxDetections,  numSelected);
  
         AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, selectedIndices,
                            boxIndices, maxScoreClasses, maxScores,
                            detectionBoxes, detectionScores, detectionClasses, numDetections);
     }
 }

◆ ExecuteFunction()

void armnn::ExecuteFunction	(	std::vector< ITensorHandle * >	inputs,
		std::vector< ITensorHandle * >	outputs,
		BinaryOperation	operation
	)

Definition at line 27 of file RefElementwiseBinaryWorkload.cpp.

 {
     const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
     const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]);
     const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
  
     const TensorShape& inShape0 = inputInfo0.GetShape();
     const TensorShape& inShape1 = inputInfo1.GetShape();
     const TensorShape& outShape = outputInfo.GetShape();
  
     std::unique_ptr<Decoder<DataType>> input0 = MakeDecoder<DataType>(inputInfo0, inputs[0]->Map());
     std::unique_ptr<Decoder<DataType>> input1 = MakeDecoder<DataType>(inputInfo1, inputs[1]->Map());
     std::unique_ptr<Encoder<DataType>> output = MakeEncoder<DataType>(outputInfo, outputs[0]->Map());
  
     using AddFunction     = ElementwiseBinaryFunction<std::plus<DataType>>;
     using DivFunction     = ElementwiseBinaryFunction<std::divides<DataType>>;
     using MaximumFunction = ElementwiseBinaryFunction<armnn::maximum<DataType>>;
     using MinimumFunction = ElementwiseBinaryFunction<armnn::minimum<DataType>>;
     using MulFunction     = ElementwiseBinaryFunction<std::multiplies<DataType>>;
     using SubFunction     = ElementwiseBinaryFunction<std::minus<DataType>>;
     using SqDiffFunction  = ElementwiseBinaryFunction<armnn::squaredDifference<DataType>>;
     using PowerFunction   = ElementwiseBinaryFunction<armnn::power<DataType>>;
  
     switch (operation)
     {
         case BinaryOperation::Add:
         {
             AddFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
             break;
         }
         case BinaryOperation::Div:
         {
             DivFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
             break;
         }
         case BinaryOperation::Maximum:
         {
             MaximumFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
             break;
         }
         case BinaryOperation::Minimum:
         {
             MinimumFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
             break;
         }
         case BinaryOperation::Mul:
         {
             MulFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
             break;
         }
         case BinaryOperation::Sub:
         {
             SubFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
             break;
         }
         case BinaryOperation::SqDiff:
         {
             SqDiffFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
             break;
         }
         case BinaryOperation::Power:
         {
             PowerFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
             break;
         }
         default:
         {
             throw InvalidArgumentException(std::string("Unsupported binary operation ") +
                                            GetBinaryOperationAsCString(operation), CHECK_LOCATION());
         }
     }
 }

References Add, CHECK_LOCATION, Div, GetBinaryOperationAsCString(), TensorInfo::GetShape(), GetTensorInfo(), Map, Maximum, Minimum, Mul, Power, SqDiff, and Sub.

◆ ExtractJsonObjects()

void armnn::ExtractJsonObjects	(	unsigned int	inferenceIndex,
		const Event *	parentEvent,
		JsonChildObject &	parentObject,
		std::map< const Event , std::vector< const Event >>	descendantsMap
	)

Definition at line 303 of file Profiling.cpp.

 {
     ARMNN_ASSERT(parentEvent);
  
     // If profiling GUID is entered, process it
     if (parentEvent->GetProfilingGuid().has_value())
     {
         arm::pipe::ProfilingGuid profilingGuid;
         profilingGuid = parentEvent->GetProfilingGuid().value();
         parentObject.SetGuid(profilingGuid);
     }
     std::vector<Measurement> instrumentMeasurements = parentEvent->GetMeasurements();
     unsigned int childIdx = 0;
     unsigned int numSkippedKernels = 0;
     if (inferenceIndex > 0)
     {
         for (auto &i: parentEvent->GetInstruments())
         {
             if (i->HasKernelMeasurements())
             {
                 numSkippedKernels = static_cast<unsigned int>(parentObject.m_Children.size() -
                                                                instrumentMeasurements.size());
                 childIdx = numSkippedKernels;
             }
         }
     }
  
     for (size_t measurementIndex = 0; measurementIndex < instrumentMeasurements.size(); ++measurementIndex, ++childIdx)
     {
         if (inferenceIndex == 0)
         {
             // Only add kernel measurement once, in case of multiple inferences
             JsonChildObject measurementObject{ instrumentMeasurements[measurementIndex].m_Name };
             measurementObject.SetUnit(instrumentMeasurements[measurementIndex].m_Unit);
             measurementObject.SetType(JsonObjectType::Measurement);
  
             ARMNN_ASSERT(parentObject.NumChildren() == childIdx);
             parentObject.AddChild(measurementObject);
         }
         else
         {
             if (numSkippedKernels > 0)
             {
                 parentObject.GetChild(--numSkippedKernels).AddMeasurement(0.0);
             }
         }
  
         parentObject.GetChild(childIdx).AddMeasurement(instrumentMeasurements[measurementIndex].m_Value);
     }
  
     auto childEventsIt = descendantsMap.find(parentEvent);
     if (childEventsIt != descendantsMap.end())
     {
         for (auto childEvent : childEventsIt->second)
         {
             if (inferenceIndex == 0)
             {
                 // Only add second level once, in case of multiple inferences
                 JsonChildObject childObject{ childEvent->GetName() };
                 childObject.SetType(JsonObjectType::Event);
                 parentObject.AddChild(childObject);
             }
  
             // It's possible that childIdx can overrun the parents' child vector. Check before we try to process a
             // non-existent child.
             if (childIdx < parentObject.NumChildren())
             {
                 // Recursively process children.
                 ExtractJsonObjects(inferenceIndex, childEvent, parentObject.GetChild(childIdx), descendantsMap);
                 childIdx++;
             }
         }
     }
 }

References JsonChildObject::AddChild(), JsonChildObject::AddMeasurement(), ARMNN_ASSERT, Event, JsonChildObject::GetChild(), Event::GetInstruments(), Event::GetMeasurements(), Event::GetProfilingGuid(), OptionalBase::has_value(), JsonChildObject::m_Children, Measurement, JsonChildObject::NumChildren(), JsonChildObject::SetGuid(), JsonChildObject::SetType(), JsonChildObject::SetUnit(), and OptionalReferenceSwitch< IsReference, T >::value().

Referenced by ProfilerImpl::Print().

◆ FakeQuantization()

void armnn::FakeQuantization	(	const float *	inputData,
		float *	outputData,
		uint32_t	numElements,
		float	min,
		float	max
	)

Definition at line 17 of file RefFakeQuantizationFloat32Workload.cpp.

 {
     float scale = (max - min) / 255.f;
     int32_t offset = armnn::numeric_cast<int32_t>((-min * 255.f) / (max - min));
  
     for (uint32_t i = 0; i < numElements; i++)
     {
         outputData[i] = static_cast<float>(armnn::Quantize<uint8_t>(inputData[i], scale, offset));
     }
  
 }

◆ FalseFunc()

bool armnn::FalseFunc	(	Optional< std::string & >	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 62 of file LayerSupportCommon.hpp.

 {
     IgnoreUnused(reasonIfUnsupported);
     IgnoreUnused(params...);
     return false;
 }

References IgnoreUnused().

◆ FalseFuncF16()

bool armnn::FalseFuncF16	(	Optional< std::string & >	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 70 of file LayerSupportCommon.hpp.

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type");
     return false;
 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseFuncF32()

bool armnn::FalseFuncF32	(	Optional< std::string & >	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 78 of file LayerSupportCommon.hpp.

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type");
     return false;
 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseFuncI32()

bool armnn::FalseFuncI32	(	Optional< std::string & >	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 94 of file LayerSupportCommon.hpp.

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with int32 data type");
     return false;
 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseFuncU8()

bool armnn::FalseFuncU8	(	Optional< std::string & >	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 86 of file LayerSupportCommon.hpp.

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with 8-bit data type");
     return false;
 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseInputFuncF16()

bool armnn::FalseInputFuncF16	(	Optional< std::string & >	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 110 of file LayerSupportCommon.hpp.

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type input");
     return false;
 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseInputFuncF32()

bool armnn::FalseInputFuncF32	(	Optional< std::string & >	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 102 of file LayerSupportCommon.hpp.

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type input");
     return false;
 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseOutputFuncF16()

bool armnn::FalseOutputFuncF16	(	Optional< std::string & >	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 126 of file LayerSupportCommon.hpp.

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type output");
     return false;
 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseOutputFuncF32()

bool armnn::FalseOutputFuncF32	(	Optional< std::string & >	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 118 of file LayerSupportCommon.hpp.

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type output");
     return false;
 }

References IgnoreUnused(), and SetValueChecked().

◆ Fill()

void Fill	(	Encoder< float > &	output,
		const TensorShape &	desiredOutputShape,
		const float	value
	)

Creates a tensor and fills it with a scalar value.

Definition at line 13 of file Fill.cpp.

 {
     for(unsigned int i = 0; i < desiredOutputShape.GetNumElements(); ++i)
     {
         output[i];
         output.Set(value);
     }
 }

References TensorShape::GetNumElements(), and Encoder< IType >::Set().

◆ FindKernelMeasurements()

std::vector<Measurement> armnn::FindKernelMeasurements ( const Event * event )

Definition at line 62 of file Profiling.cpp.

 {
     ARMNN_ASSERT(event != nullptr);
  
     std::vector<Measurement> measurements;
  
     // Search through the measurements.
     for (const auto& measurement : event->GetMeasurements())
     {
         if (measurement.m_Name.rfind("OpenClKernelTimer", 0) == 0
             || measurement.m_Name.rfind("NeonKernelTimer", 0) == 0)
         {
             // Measurement found.
             measurements.push_back(measurement);
         }
     }
  
     return measurements;
 }

References ARMNN_ASSERT, and Event::GetMeasurements().

◆ FindMeasurement()

Measurement armnn::FindMeasurement	(	const std::string &	name,
		const Event *	event
	)

Definition at line 43 of file Profiling.cpp.

 {
  
     ARMNN_ASSERT(event != nullptr);
  
     // Search though the measurements.
     for (const auto& measurement : event->GetMeasurements())
     {
         if (measurement.m_Name == name)
         {
             // Measurement found.
             return measurement;
         }
     }
  
     // Measurement not found.
     return Measurement{ "", 0.f, Measurement::Unit::TIME_MS };
 }

References ARMNN_ASSERT, and Event::GetMeasurements().

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults(), and ProfilerImpl::CalculateProfilingEventStats().

◆ FinishClContextBuffer()

void armnn::FinishClContextBuffer	(	flatbuffers::FlatBufferBuilder &	fbb,
		flatbuffers::Offset< armnn::ClContext >	root
	)

inline

Definition at line 171 of file ClContextSchema_generated.h.

                                             {
   fbb.Finish(root, ClContextIdentifier());
 }

References ClContextIdentifier().

◆ FinishSizePrefixedClContextBuffer()

void armnn::FinishSizePrefixedClContextBuffer	(	flatbuffers::FlatBufferBuilder &	fbb,
		flatbuffers::Offset< armnn::ClContext >	root
	)

inline

Definition at line 177 of file ClContextSchema_generated.h.

                                             {
   fbb.FinishSizePrefixed(root, ClContextIdentifier());
 }

References ClContextIdentifier().

◆ FoldPadIntoAveragePool2d()

LayerType* armnn::FoldPadIntoAveragePool2d	(	OptimizationViews &	optimizationViews,
		Pooling2dLayer *	baseLayer,
		Pooling2dDescriptor &	poolDescriptor,
		PadLayer *	padLayer
	)

Definition at line 341 of file SubgraphUtils.hpp.

 {
     IConnectableLayer* replacement =
         optimizationViews.GetINetwork()->AddPooling2dLayer(poolDescriptor, "folded-pad-into-pool2d");
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
  
     FoldPadLayer(optimizationViews,
                  baseLayer,
                  replacementLayer,
                  padLayer);
  
     return replacementLayer;
 }

References INetwork::AddPooling2dLayer(), FoldPadLayer(), and OptimizationViews::GetINetwork().

◆ FoldPadLayer()

LayerType* armnn::FoldPadLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		LayerType *	replacementLayer,
		PadLayer *	padLayer
	)

Definition at line 234 of file SubgraphUtils.hpp.

 {
     SubgraphView substitutionSubgraph({padLayer, baseLayer},
                                       CreateIInputsFrom({padLayer}),
                                       CreateIOutputsFrom({baseLayer}));
     SubgraphView replacementSubgraph(replacementLayer);
  
     optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
  
     return replacementLayer;
 }

References OptimizationViews::AddSubstitution().

Referenced by FoldPadIntoAveragePool2d().

◆ ForEachLayerInput()

void armnn::ForEachLayerInput	(	LayerSelectionInfo::LayerInfoContainer &	layerInfos,
		LayerSelectionInfo &	layerInfo,
		Delegate	function
	)

Definition at line 267 of file SubgraphViewSelector.cpp.

 {
     Layer& layer = *PolymorphicDowncast<Layer*>(layerInfo.m_Layer);
  
     for (auto inputSlot : layer.GetInputSlots())
     {
         auto connectedInput = PolymorphicDowncast<OutputSlot*>(inputSlot.GetConnection());
         ARMNN_ASSERT_MSG(connectedInput, "Dangling input slot detected.");
         Layer& inputLayer = connectedInput->GetOwningLayer();
  
         auto parentInfo = layerInfos.find(&inputLayer);
         if (parentInfo != layerInfos.end())
         {
             function(parentInfo->second);
         }
     }
 }

References ARMNN_ASSERT_MSG, and Layer::GetInputSlots().

Referenced by AssignSplitId(), and IsReadyForSplitAssignment().

◆ ForEachLayerOutput()

void armnn::ForEachLayerOutput	(	LayerSelectionInfo::LayerInfoContainer &	layerInfos,
		LayerSelectionInfo &	layerInfo,
		Delegate	function
	)

Definition at line 288 of file SubgraphViewSelector.cpp.

 {
     Layer& layer = *PolymorphicDowncast<Layer*>(layerInfo.m_Layer);
  
     for (auto& outputSlot : layer.GetOutputSlots())
     {
         for (auto& output : outputSlot.GetConnections())
         {
             Layer& childLayer = output->GetOwningLayer();
  
             auto childInfo = layerInfos.find(&childLayer);
             if (childInfo != layerInfos.end())
             {
                 function(childInfo->second);
             }
         }
     }
 }

References Layer::GetOutputSlots().

Referenced by SubgraphViewSelector::SelectSubgraphs().

◆ FullyConnected()

void FullyConnected	(	const TensorShape &	rInputShape,
		Decoder< float > &	rInputDecoder,
		const TensorShape &	rOutputShape,
		Encoder< float > &	rOutputEncoder,
		const TensorShape &	rWeightsShape,
		Decoder< float > &	rWeightDecoder,
		Decoder< float > *	pBiasDecoder,
		const bool	biasEnabled,
		const unsigned int	K,
		const bool	transposeWeights
	)

Performs a matrix multiplication and optionally adds a bias.

Definition at line 15 of file FullyConnected.cpp.

 {
     // Perform FullyConnected implementation
     unsigned int outputSize = rOutputShape[1];
  
     const std::vector<float> decodedInputs = rInputDecoder.DecodeTensor(rInputShape);
     const std::vector<float> decodedWeights = rWeightDecoder.DecodeTensor(rWeightsShape);
  
     const TensorShape biasShape{outputSize};
  
     ARMNN_ASSERT(!biasEnabled || pBiasDecoder != nullptr);
     const std::vector<float> decodedBiases = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
  
  
     for (unsigned int n = 0; n < rInputShape[0]; n++)
     {
         for (unsigned int channelOutput = 0; channelOutput < outputSize; channelOutput++)
         {
             float outval = 0.f;
  
             for (unsigned int channelInput = 0; channelInput < K; channelInput++)
             {
                 float weight;
                 if (transposeWeights)
                 {
                     weight = decodedWeights[channelOutput * K + channelInput];
                 }
                 else
                 {
                     weight = decodedWeights[channelInput * outputSize + channelOutput];
                 }
  
                 outval += weight * decodedInputs[n * K + channelInput];
             }
  
             if (biasEnabled)
             {
                 outval += decodedBiases[channelOutput];
             }
  
             rOutputEncoder[n * outputSize + channelOutput];
             rOutputEncoder.Set(outval);
         }
     }
 }

References ARMNN_ASSERT, Decoder< IType >::DecodeTensor(), and Encoder< IType >::Set().

◆ FuseAdditionLayer()

LayerType* armnn::FuseAdditionLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 74 of file ArmComputeSubgraphUtils.hpp.

 {
     ARMNN_NO_DEPRECATE_WARN_BEGIN
     IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddAdditionLayer(name.c_str());
     ARMNN_NO_DEPRECATE_WARN_END
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
  
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
  
     return replacementLayer;
 }

References INetwork::AddAdditionLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseBatchNormalizationLayer()

LayerType* armnn::FuseBatchNormalizationLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 179 of file ArmComputeSubgraphUtils.hpp.

 {
     IConnectableLayer* replacement =
         optimizationViews.GetINetwork()->AddBatchNormalizationLayer(baseLayer->GetParameters(),
                                                                     ConstTensor(),
                                                                     ConstTensor(),
                                                                     ConstTensor(),
                                                                     ConstTensor(),
                                                                     name.c_str());
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
  
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
  
     SubgraphView substitutionSubgraph({baseLayer, activationLayer},
                                       CreateIInputsFrom({baseLayer}),
                                       CreateIOutputsFrom({activationLayer}));
     SubgraphView replacementSubgraph(replacementLayer);
  
     return replacementLayer;
 }

References INetwork::AddBatchNormalizationLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseConvolution2dLayer()

LayerType* armnn::FuseConvolution2dLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 209 of file ArmComputeSubgraphUtils.hpp.

 {
     IConnectableLayer* replacement = optimizationViews.GetINetwork()
                                                       ->AddConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
  
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
  
  
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
  
     return replacementLayer;
 }

References INetwork::AddConvolution2dLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseDepthwiseConvolution2dLayer()

LayerType* armnn::FuseDepthwiseConvolution2dLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 231 of file ArmComputeSubgraphUtils.hpp.

 {
     IConnectableLayer* replacement =
         optimizationViews.GetINetwork()->AddDepthwiseConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
  
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
  
  
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
  
     return replacementLayer;
 }

References INetwork::AddDepthwiseConvolution2dLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseDivisionLayer()

LayerType* armnn::FuseDivisionLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 116 of file ArmComputeSubgraphUtils.hpp.

 {
     ARMNN_NO_DEPRECATE_WARN_BEGIN
     IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddDivisionLayer(name.c_str());
     ARMNN_NO_DEPRECATE_WARN_END
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
  
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
  
     return replacementLayer;
 }

References INetwork::AddDivisionLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseElementwiseBinaryLayer()

LayerType* armnn::FuseElementwiseBinaryLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		BinaryOperation	operation,
		std::string	name
	)

Definition at line 158 of file ArmComputeSubgraphUtils.hpp.

 {
     IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddElementwiseBinaryLayer(operation,
                                                                                                 name.c_str());
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
  
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
  
     return replacementLayer;
 }

References INetwork::AddElementwiseBinaryLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseFullyConnectedLayer()

LayerType* armnn::FuseFullyConnectedLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 253 of file ArmComputeSubgraphUtils.hpp.

 {
     IConnectableLayer* replacement =
         optimizationViews.GetINetwork()->AddFullyConnectedLayer(baseLayer->GetParameters(),
                                                                 name.c_str());
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
  
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
  
  
     return replacementLayer;
 }

References INetwork::AddFullyConnectedLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseLayer()

LayerType* armnn::FuseLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		LayerType *	replacementLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc
	)

Definition at line 54 of file ArmComputeSubgraphUtils.hpp.

 {
     replacementLayer->SetAdditionalInfoForObject(
         std::make_shared<ActivationDescriptor>(activationDesc));
  
     SubgraphView substitutionSubgraph({baseLayer, activationLayer},
                                       CreateIInputsFrom({baseLayer}),
                                       CreateIOutputsFrom({activationLayer}));
     SubgraphView replacementSubgraph(replacementLayer);
  
     optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
  
     return replacementLayer;
 }

References OptimizationViews::AddSubstitution().

Referenced by FuseAdditionLayer(), FuseBatchNormalizationLayer(), FuseConvolution2dLayer(), FuseDepthwiseConvolution2dLayer(), FuseDivisionLayer(), FuseElementwiseBinaryLayer(), FuseFullyConnectedLayer(), FuseMultiplicationLayer(), and FuseSubtractionLayer().

◆ FuseMultiplicationLayer()

LayerType* armnn::FuseMultiplicationLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 137 of file ArmComputeSubgraphUtils.hpp.

 {
     ARMNN_NO_DEPRECATE_WARN_BEGIN
     IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddMultiplicationLayer(name.c_str());
     ARMNN_NO_DEPRECATE_WARN_END
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
  
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
  
     return replacementLayer;
 }

References INetwork::AddMultiplicationLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseSubtractionLayer()

LayerType* armnn::FuseSubtractionLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 95 of file ArmComputeSubgraphUtils.hpp.

 {
     ARMNN_NO_DEPRECATE_WARN_BEGIN
     IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddSubtractionLayer(name.c_str());
     ARMNN_NO_DEPRECATE_WARN_END
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
  
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
  
     return replacementLayer;
 }

References INetwork::AddSubtractionLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ Gather()

void Gather	(	const TensorInfo &	paramsInfo,
		const TensorInfo &	indicesInfo,
		const TensorInfo &	outputInfo,
		Decoder< float > &	params,
		const int32_t *	indices,
		Encoder< float > &	output,
		const int32_t	axis_int
	)

Definition at line 15 of file Gather.cpp.

 {
     IgnoreUnused(outputInfo);
  
     const int paramsRank = static_cast<int>(paramsInfo.GetNumDimensions());
     if((axis_int < -1 * paramsRank) || (paramsRank <= axis_int))
     {
         throw InvalidArgumentException((fmt::format("Gather: Axis {} is not within [-{}, {}) range",
                                                     axis_int, paramsRank, paramsRank)));
     }
     const unsigned int axis = (axis_int < 0) ? static_cast<unsigned int>(paramsRank + axis_int)
                                              : static_cast<unsigned int>(axis_int);
  
     const TensorShape& paramsShape = paramsInfo.GetShape();
  
     // Product of all dimensions to the left side of the axis
     unsigned int paramsOuterProduct = 1;
     for (unsigned int i = 0; i < axis; ++i)
     {
         paramsOuterProduct *= paramsShape[i];
     }
     // Product of all dimensions to the right side of the axis
     unsigned int paramsInnerProduct = 1;
     for (unsigned int k = 1 + axis; k < paramsInfo.GetNumDimensions(); ++k)
     {
         paramsInnerProduct *= paramsShape[k];
     }
  
     unsigned int offset = 0;
     unsigned int outIndex = 0;
     for (unsigned int i = 0; i < paramsOuterProduct; ++i)
     {
         for (unsigned int j = 0; j < indicesInfo.GetNumElements(); ++j)
         {
             unsigned int index =
                 (indices[j] < 0) ? static_cast<unsigned int>(static_cast<int>(paramsShape[axis]) + indices[j])
                                  : static_cast<unsigned int>(indices[j]);
  
             if (index >= paramsShape[axis])
             {
                 throw InvalidArgumentException((fmt::format("Gather: index >= paramsShape[axis]: {} >= {}",
                                                             index, paramsShape[axis] )));
             }
  
             unsigned int startOffset = (paramsInnerProduct * index) + offset;
             unsigned int endOffset = startOffset + paramsInnerProduct;
  
             for (unsigned int k = startOffset; k < endOffset; ++k)
             {
                 params[k];
                 float outputValue = params.Get();
                 output[outIndex];
                 output.Set(outputValue);
                 ++outIndex;
             }
         }
         offset += paramsShape[axis] * paramsInnerProduct;
     }
  
     if (outIndex != outputInfo.GetNumElements())
     {
         throw InvalidArgumentException((fmt::format("Gather: Invalid outIndex {} ", outIndex)));
     }
 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), IgnoreUnused(), and Encoder< IType >::Set().

◆ GatherTensorHandlePairs()

void armnn::GatherTensorHandlePairs	(	const DescriptorType &	descriptor,
		std::vector< std::pair< SrcTensorHandleType , DstTensorHandleType >> &	tensorHandlePairs
	)

Definition at line 204 of file WorkloadUtils.hpp.

 {
     const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
     tensorHandlePairs.reserve(numInputs);
  
     for (unsigned int i = 0; i < numInputs; ++i)
     {
         SrcTensorHandleType* const srcTensorHandle =
             PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
         DstTensorHandleType* const dstTensorHandle =
             PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
  
         tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
     }
 }

Referenced by CopyMemGenericWorkload::CopyMemGenericWorkload(), CopyMemGenericWorkload::ExecuteAsync(), NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(), and NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload().

◆ GeneratePermutationVectorOnLastTwoDimensions()

armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions ( unsigned int rank )

Generates a permutation vector of size rank that permutes the 2 most right dimensions.

Parameters

rank	- Tensor rank, i.e. number of dimensions in the tensors

Returns: - A permutation vector that permutes the 2 last dimensions

Definition at line 356 of file WorkloadUtils.cpp.

 {
     armnn::PermutationVector permutationVector{};
     switch (rank)
     {
         case 2:
             permutationVector = {1U, 0U};
             break;
         case 3:
             permutationVector = {0U, 2U, 1U};
             break;
         case 4:
             permutationVector = {0U, 1U, 3U, 2U};
             break;
         default:
             throw Exception("Invalid number of dimensions.");
     }
     return permutationVector;
 }

◆ GenerateRangeK()

std::vector<unsigned int> armnn::GenerateRangeK ( unsigned int k )

Definition at line 18 of file DetectionPostProcess.cpp.

 {
     std::vector<unsigned int> range(k);
     std::iota(range.begin(), range.end(), 0);
     return range;
 }

Referenced by DetectionPostProcess(), and NonMaxSuppression().

◆ GetActivationFunctionAsCString()

constexpr char const* armnn::GetActivationFunctionAsCString ( ActivationFunction activation )

constexpr

Definition at line 31 of file TypesUtils.hpp.

 {
     switch (activation)
     {
         case ActivationFunction::Sigmoid:       return "Sigmoid";
         case ActivationFunction::TanH:          return "TanH";
         case ActivationFunction::Linear:        return "Linear";
         case ActivationFunction::ReLu:          return "ReLu";
         case ActivationFunction::BoundedReLu:   return "BoundedReLu";
         case ActivationFunction::SoftReLu:      return "SoftReLu";
         case ActivationFunction::LeakyReLu:     return "LeakyReLu";
         case ActivationFunction::Abs:           return "Abs";
         case ActivationFunction::Sqrt:          return "Sqrt";
         case ActivationFunction::Square:        return "Square";
         case ActivationFunction::Elu:           return "Elu";
         case ActivationFunction::HardSwish:     return "HardSwish";
         case ActivationFunction::Gelu:          return "Gelu";
         default:                                return "Unknown";
     }
 }

References Abs, BoundedReLu, Elu, Gelu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by StringifyLayerParameters< ActivationDescriptor >::Serialize().

◆ GetArgMinMaxFunctionAsCString()

constexpr char const* armnn::GetArgMinMaxFunctionAsCString ( ArgMinMaxFunction function )

constexpr

Definition at line 52 of file TypesUtils.hpp.

 {
     switch (function)
     {
         case ArgMinMaxFunction::Max:    return "Max";
         case ArgMinMaxFunction::Min:    return "Min";
         default:                        return "Unknown";
     }
 }

References Max, and Min.

◆ GetBiasDataType()

DataType GetBiasDataType ( DataType inputDataType )

Definition at line 28 of file WorkloadData.cpp.

 {
     switch (inputDataType)
     {
         case DataType::Float16:
             return DataType::Float16;
         case DataType::BFloat16:
         case DataType::Float32:
             return DataType::Float32;
         case DataType::QAsymmS8:
         case DataType::QAsymmU8:
         case DataType::QSymmS8:
         case DataType::QSymmS16:
             return DataType::Signed32;
         default:
             ARMNN_ASSERT_MSG(false, "Invalid input data type");
             return DataType::Float32;
     }
 }

References ARMNN_ASSERT_MSG, and BFloat16.

Referenced by FullyConnectedQueueDescriptor::Validate(), Convolution2dQueueDescriptor::Validate(), Convolution3dQueueDescriptor::Validate(), DepthwiseConvolution2dQueueDescriptor::Validate(), and TransposeConvolution2dQueueDescriptor::Validate().

◆ GetBiasTypeFromWeightsType()

armnn::Optional< armnn::DataType > GetBiasTypeFromWeightsType ( armnn::Optional< armnn::DataType > weightsType )

inline

Definition at line 14 of file LayerSupportRules.hpp.

 {
     if (!weightsType)
     {
         return weightsType;
     }
  
     switch(weightsType.value())
     {
         case armnn::DataType::Float16:
         case armnn::DataType::Float32:
             return weightsType;
         case armnn::DataType::QAsymmS8:
         case armnn::DataType::QAsymmU8:
         case armnn::DataType::QSymmS8:
         case armnn::DataType::QSymmS16:
             return armnn::DataType::Signed32;
         default:
             ARMNN_ASSERT_MSG(false, "GetBiasTypeFromWeightsType(): Unsupported data type.");
     }
     return armnn::EmptyOptional();
 }

References ARMNN_ASSERT_MSG, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by BiasAndWeightsTypesMatch::BiasAndWeightsTypesMatch().

◆ GetBinaryOperationAsCString()

constexpr char const* armnn::GetBinaryOperationAsCString ( BinaryOperation operation )

constexpr

Definition at line 76 of file TypesUtils.hpp.

 {
     switch (operation)
     {
         case BinaryOperation::Add:      return "Add";
         case BinaryOperation::Div:      return "Div";
         case BinaryOperation::Maximum:  return "Maximum";
         case BinaryOperation::Minimum:  return "Minimum";
         case BinaryOperation::Mul:      return "Mul";
         case BinaryOperation::Power:    return "Power";
         case BinaryOperation::SqDiff:   return "SqDiff";
         case BinaryOperation::Sub:      return "Sub";
         default:                        return "Unknown";
     }
 }

References Add, Div, Maximum, Minimum, Mul, Power, SqDiff, and Sub.

Referenced by ExecuteFunction(), and StringifyLayerParameters< ElementwiseBinaryDescriptor >::Serialize().

◆ GetCapability() [1/2]

Optional< const BackendOptions::BackendOption > GetCapability	(	const std::string &	backendCapabilityName,
		const armnn::BackendId &	backend
	)

Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted.

Definition at line 51 of file BackendHelper.cpp.

 {
     auto const& backendRegistry = armnn::BackendRegistryInstance();
     if (backendRegistry.IsBackendRegistered(backend))
     {
         auto factoryFunc = backendRegistry.GetFactory(backend);
         auto backendObject = factoryFunc();
         auto capabilities = backendObject->GetCapabilities();
         return GetCapability(backendCapabilityName, capabilities);
     }
     return EmptyOptional();
 }

References BackendRegistryInstance(), and GetCapability().

◆ GetCapability() [2/2]

Optional< const BackendOptions::BackendOption > GetCapability	(	const std::string &	backendCapabilityName,
		const BackendCapabilities &	capabilities
	)

Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted.

Definition at line 37 of file BackendHelper.cpp.

 {
     for (size_t i=0; i < capabilities.GetOptionCount(); i++)
     {
         const auto& capability = capabilities.GetOption(i);
         if (backendCapabilityName == capability.GetName())
         {
             return capability;
         }
     }
     return EmptyOptional();
 }

References BackendOptions::GetOption(), and BackendOptions::GetOptionCount().

Referenced by GetCapability(), HasCapability(), LayerSupportHandle::IsConvolution2dSupported(), LayerSupportHandle::IsDepthwiseConvolutionSupported(), LayerSupportHandle::IsDilatedDepthwiseConvolutionSupported(), and LayerSupportHandle::IsFullyConnectedSupported().

◆ GetClContext()

const armnn::ClContext* armnn::GetClContext ( const void * buf )

inline

Definition at line 140 of file ClContextSchema_generated.h.

                                                            {
   return flatbuffers::GetRoot<armnn::ClContext>(buf);
 }

Referenced by ClContextDeserializer::DeserializeFromBinary().

◆ GetComparisonOperationAsCString()

constexpr char const* armnn::GetComparisonOperationAsCString ( ComparisonOperation operation )

constexpr

Definition at line 62 of file TypesUtils.hpp.

 {
     switch (operation)
     {
         case ComparisonOperation::Equal:          return "Equal";
         case ComparisonOperation::Greater:        return "Greater";
         case ComparisonOperation::GreaterOrEqual: return "GreaterOrEqual";
         case ComparisonOperation::Less:           return "Less";
         case ComparisonOperation::LessOrEqual:    return "LessOrEqual";
         case ComparisonOperation::NotEqual:       return "NotEqual";
         default:                                  return "Unknown";
     }
 }

References Equal, Greater, GreaterOrEqual, Less, LessOrEqual, and NotEqual.

Referenced by StringifyLayerParameters< ComparisonDescriptor >::Serialize().

◆ GetComputeDeviceAsCString()

constexpr char const* armnn::GetComputeDeviceAsCString ( Compute compute )

constexpr

Deprecated function that will be removed together with the Compute enum.

Definition at line 34 of file BackendId.hpp.

 {
     switch (compute)
     {
         case armnn::Compute::CpuRef: return "CpuRef";
         case armnn::Compute::CpuAcc: return "CpuAcc";
         case armnn::Compute::GpuAcc: return "GpuAcc";
         default:                     return "Unknown";
     }
 }

References CpuAcc, CpuRef, and GpuAcc.

Referenced by BackendId::BackendId(), BackendId::IsCpuAcc(), BackendId::IsCpuRef(), BackendId::IsGpuAcc(), BackendId::IsUndefined(), and operator<<().

◆ GetConvolutionMethodString()

std::string GetConvolutionMethodString ( arm_compute::ConvolutionMethod & convolutionMethod )

inline

Definition at line 46 of file ClWorkloadUtils.hpp.

 {
     switch (convolutionMethod)
     {
         case arm_compute::ConvolutionMethod::FFT:
             return "FFT";
         case arm_compute::ConvolutionMethod::DIRECT:
             return "Direct";
         case arm_compute::ConvolutionMethod::GEMM:
             return "GEMM";
         case arm_compute::ConvolutionMethod::WINOGRAD:
             return "Winograd";
         default:
             return "Unknown";
     }
 }

◆ GetDataLayoutName()

constexpr const char* armnn::GetDataLayoutName ( DataLayout dataLayout )

constexpr

Definition at line 253 of file TypesUtils.hpp.

 {
     switch (dataLayout)
     {
         case DataLayout::NCHW:  return "NCHW";
         case DataLayout::NHWC:  return "NHWC";
         case DataLayout::NDHWC: return "NDHWC";
         case DataLayout::NCDHW: return "NCDHW";
         default:                return "Unknown";
     }
 }

References NCDHW, NCHW, NDHWC, and NHWC.

◆ GetDataTypeName()

constexpr const char* armnn::GetDataTypeName ( DataType dataType )

constexpr

Definition at line 233 of file TypesUtils.hpp.

 {
     switch (dataType)
     {
         case DataType::Float16:               return "Float16";
         case DataType::Float32:               return "Float32";
         case DataType::Signed64:              return "Signed64";
         case DataType::QAsymmU8:              return "QAsymmU8";
         case DataType::QAsymmS8:              return "QAsymmS8";
         case DataType::QSymmS8:               return "QSymmS8";
         case DataType::QSymmS16:              return "QSymm16";
         case DataType::Signed32:              return "Signed32";
         case DataType::Boolean:               return "Boolean";
         case DataType::BFloat16:              return "BFloat16";
  
         default:
             return "Unknown";
     }
 }

References BFloat16, Boolean, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and Signed64.

Referenced by AttemptBackendAssignment(), RefDebugWorkload< DataType >::GetName(), armnnUtils::GetPerAxisParams(), ConstantLayer::SerializeLayerParameters(), armnnUtils::ToFloatArray(), and VerifyTensorInfoDataType().

◆ GetDataTypeSize()

constexpr unsigned int armnn::GetDataTypeSize ( DataType dataType )

constexpr

Definition at line 182 of file TypesUtils.hpp.

 {
     switch (dataType)
     {
         case DataType::BFloat16:
         case DataType::Float16:               return 2U;
         case DataType::Float32:
         case DataType::Signed32:              return 4U;
         case DataType::Signed64:              return 8U;
         case DataType::QAsymmU8:              return 1U;
         case DataType::QAsymmS8:              return 1U;
         case DataType::QSymmS8:               return 1U;
         case DataType::QSymmS16:              return 2U;
         case DataType::Boolean:               return 1U;
         default:                              return 0U;
     }
 }

References BFloat16, Boolean, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and Signed64.

Referenced by TosaRefTensorHandle::CanBeImported(), RefTensorHandle::CanBeImported(), TensorInfo::GetNumBytes(), GetUnpaddedTensorStrides(), PermuteTensor(), and armnn_driver::SwizzleAndroidNn4dTensorToArmNn().

◆ GetEventPtr() [1/2]

const Event* armnn::GetEventPtr ( const Event * ptr )

Definition at line 109 of file Profiling.cpp.

109 { return ptr;}

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults().

◆ GetEventPtr() [2/2]

const Event* armnn::GetEventPtr ( const std::unique_ptr< Event > & ptr )

Definition at line 110 of file Profiling.cpp.

110 {return ptr.get(); }

◆ GetFusedName()

void armnn::GetFusedName	(	Layer *	layerList[4],
		std::string &	fusedName
	)

inline

Definition at line 71 of file NeonBackendOptimizationUtils.hpp.

 {
     // Build the fused name string
     fusedName = "fused";
     for (unsigned int layerIdx = 0; layerIdx< 4; ++layerIdx)
     {
         if (! layerList[layerIdx])
         {
             break;
         }
         fusedName += "-";
         fusedName += layerList[layerIdx]->GetNameStr();
     }
 }

References Layer::GetNameStr().

Referenced by NeonBackend::OptimizeSubgraphView().

◆ GetFusedTypeAsCString()

constexpr char const* armnn::GetFusedTypeAsCString ( FusedKernelType type )

constexpr

Definition at line 119 of file TypesUtils.hpp.

 {
     switch (type)
     {
         case FusedKernelType::AddMulAdd:   return "AddMulAdd";
         default:                           return "Unknown";
     }
 }

References AddMulAdd.

Referenced by StringifyLayerParameters< FusedDescriptor >::Serialize().

◆ GetGraphForTesting()

Graph & GetGraphForTesting ( IOptimizedNetwork * optNet )

Definition at line 49 of file TestUtils.cpp.

 {
     return optNet->pOptimizedNetworkImpl->GetGraph();
 }

References IOptimizedNetwork::pOptimizedNetworkImpl.

◆ GetILayerSupportByBackendId()

LayerSupportHandle GetILayerSupportByBackendId ( const armnn::BackendId & backend )

Convenience function to retrieve the ILayerSupportHandle for a backend.

Definition at line 23 of file BackendHelper.cpp.

 {
     BackendRegistry& backendRegistry = armnn::BackendRegistryInstance();
  
     if (!backendRegistry.IsBackendRegistered(backend))
     {
         return LayerSupportHandle(nullptr);
     }
  
     auto factoryFunc = backendRegistry.GetFactory(backend);
     auto backendObject = factoryFunc();
     return LayerSupportHandle(backendObject->GetLayerSupport(), backend);
 }

References BackendRegistryInstance(), BackendRegistry::GetFactory(), and BackendRegistry::IsBackendRegistered().

◆ GetInputTensor()

const armnn::ConstTensor armnn::GetInputTensor	(	const LayerBindingId	layerId,
		const InputTensors &	inputTensors
	)

Definition at line 1404 of file LoadedNetwork.cpp.

 {
     for (auto inputTensorPair : inputTensors)
     {
         LayerBindingId id = inputTensorPair.first;
         if (id == layerId)
         {
             return inputTensorPair.second;
         }
     }
     throw InvalidArgumentException("Input does not exist.");
 }

◆ GetInputTensorData()

const DataType* armnn::GetInputTensorData	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 42 of file RefWorkloadUtils.hpp.

 {
     const ITensorHandle* tensorHandle = data.m_Inputs[idx];
     return reinterpret_cast<const DataType*>(tensorHandle->Map());
 }

References ITensorHandle::Map().

◆ GetInputTensorDataBFloat16()

const BFloat16* armnn::GetInputTensorDataBFloat16	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 86 of file RefWorkloadUtils.hpp.

 {
     return GetInputTensorData<BFloat16>(idx, data);
 }

◆ GetInputTensorDataFloat()

const float* armnn::GetInputTensorDataFloat	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 62 of file RefWorkloadUtils.hpp.

 {
     return GetInputTensorData<float>(idx, data);
 }

◆ GetInputTensorDataHalf()

const Half* armnn::GetInputTensorDataHalf	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 74 of file RefWorkloadUtils.hpp.

 {
     return GetInputTensorData<Half>(idx, data);
 }

◆ GetLayerInOutDatatype()

std::vector<DataType> armnn::GetLayerInOutDatatype ( const Layer * layer )

inline

Definition at line 1020 of file Network.cpp.

 {
     DataType dataTypeIn  = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
                            layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
     DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
                            layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
     return {dataTypeIn, dataTypeOut};
 }

References Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetNumInputSlots(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), and OutputSlot::GetTensorInfo().

Referenced by AssignBackends(), and AssignBackendsIConnectable().

◆ GetLayerTypeAsCString()

char const * GetLayerTypeAsCString ( LayerType type )

Definition at line 13 of file InternalTypes.cpp.

 {
     switch (type)
     {
 #define X(name) case LayerType::name: return #name;
       LIST_OF_LAYER_TYPE
 #undef X
         default:
             ARMNN_ASSERT_MSG(false, "Unknown layer type");
             return "Unknown";
     }
 }

References ARMNN_ASSERT_MSG, and LIST_OF_LAYER_TYPE.

Referenced by AttemptBackendAssignment(), CheckScaleSetOnQuantizedType(), Connect(), StrategyBase< DefaultStrategy >::ExecuteStrategy(), Layer::InferOutputShapes(), Graph::InferTensorInfos(), Graph::Print(), ReturnWithError(), Layer::SerializeLayerParameters(), Graph::SerializeToDot(), ElementwiseBaseLayer::ValidateTensorShapesFromInputs(), ElementwiseBinaryLayer::ValidateTensorShapesFromInputs(), ElementwiseUnaryLayer::ValidateTensorShapesFromInputs(), Graph::VerifyConstantLayerSetTensorInfo(), and Layer::VerifyLayerConnections().

◆ GetLogicalBinaryOperationAsCString()

constexpr char const* armnn::GetLogicalBinaryOperationAsCString ( LogicalBinaryOperation operation )

constexpr

Definition at line 109 of file TypesUtils.hpp.

 {
     switch (operation)
     {
         case LogicalBinaryOperation::LogicalAnd: return "LogicalAnd";
         case LogicalBinaryOperation::LogicalOr:  return "LogicalOr";
         default:                                 return "Unknown";
     }
 }

References LogicalAnd, and LogicalOr.

◆ GetMemBlockStrategyTypeName()

constexpr const char* armnn::GetMemBlockStrategyTypeName ( MemBlockStrategyType memBlockStrategyType )

constexpr

Definition at line 295 of file TypesUtils.hpp.

 {
     switch (memBlockStrategyType)
     {
         case MemBlockStrategyType::SingleAxisPacking: return "SingleAxisPacking";
         case MemBlockStrategyType::MultiAxisPacking:  return "MultiAxisPacking";
         default:                                      return "Unknown";
     }
 }

References MultiAxisPacking, and SingleAxisPacking.

Referenced by RuntimeImpl::RuntimeImpl().

◆ GetMemoryOptimizerStrategy()

std::unique_ptr<IMemoryOptimizerStrategy> armnn::GetMemoryOptimizerStrategy ( const std::string & strategyName )

inline

Definition at line 36 of file MemoryOptimizerStrategyLibrary.hpp.

 {
      const auto& strategyFactoryMap = GetStrategyFactories();
      auto strategyFactory = strategyFactoryMap.find(strategyName);
      if (strategyFactory != GetStrategyFactories().end())
      {
          return  strategyFactory->second->CreateMemoryOptimizerStrategy();
      }
     return nullptr;
 }

Referenced by RuntimeImpl::RuntimeImpl().

◆ GetMemoryOptimizerStrategyNames()

const std::vector<std::string> armnn::GetMemoryOptimizerStrategyNames ( )

inline

Definition at line 47 of file MemoryOptimizerStrategyLibrary.hpp.

 {
     const auto& strategyFactoryMap = GetStrategyFactories();
     std::vector<std::string> strategyNames;
     for (const auto& strategyFactory : strategyFactoryMap)
     {
         strategyNames.emplace_back(strategyFactory.first);
     }
     return strategyNames;
 }

◆ GetModelOptionsForTesting()

ModelOptions & GetModelOptionsForTesting ( IOptimizedNetwork * optNet )

Definition at line 54 of file TestUtils.cpp.

 {
     return optNet->pOptimizedNetworkImpl->GetModelOptions();
 }

References IOptimizedNetwork::pOptimizedNetworkImpl.

◆ GetNormalizationAlgorithmChannelAsCString()

constexpr const char* armnn::GetNormalizationAlgorithmChannelAsCString ( NormalizationAlgorithmChannel channel )

constexpr

Definition at line 265 of file TypesUtils.hpp.

 {
     switch (channel)
     {
         case NormalizationAlgorithmChannel::Across: return "Across";
         case NormalizationAlgorithmChannel::Within: return "Within";
         default:                                    return "Unknown";
     }
 }

References Across, and Within.

Referenced by StringifyLayerParameters< NormalizationDescriptor >::Serialize().

◆ GetNormalizationAlgorithmMethodAsCString()

constexpr const char* armnn::GetNormalizationAlgorithmMethodAsCString ( NormalizationAlgorithmMethod method )

constexpr

Definition at line 275 of file TypesUtils.hpp.

 {
     switch (method)
     {
         case NormalizationAlgorithmMethod::LocalBrightness: return "LocalBrightness";
         case NormalizationAlgorithmMethod::LocalContrast:   return "LocalContrast";
         default:                                            return "Unknown";
     }
 }

References LocalBrightness, and LocalContrast.

Referenced by StringifyLayerParameters< NormalizationDescriptor >::Serialize().

◆ GetNumActivations()

unsigned int armnn::GetNumActivations ( const TensorInfo & inputInfo )

Definition at line 16 of file RefFullyConnectedWorkload.cpp.

 {
     unsigned int numActivations = 1; // Total number of activations in the input.
     for (unsigned int i = 1; i < inputInfo.GetNumDimensions(); i++)
     {
         numActivations *= inputInfo.GetShape()[i];
     }
     return numActivations;
 }

References TensorInfo::GetNumDimensions(), and TensorInfo::GetShape().

◆ GetNumberOfCacheFiles()

unsigned int GetNumberOfCacheFiles ( const armnn::BackendId & backend )

Returns the number of cached files if backend supports caching.

Definition at line 130 of file BackendHelper.cpp.

 {
     auto const& backendRegistry = armnn::BackendRegistryInstance();
     if (backendRegistry.IsBackendRegistered(backend))
     {
         auto factoryFunc = backendRegistry.GetFactory(backend);
         auto backendObject = factoryFunc();
         return backendObject->GetNumberOfCacheFiles();
     }
     return 0;
 }

References BackendRegistryInstance().

Referenced by ArmnnDriver::getNumberOfCacheFilesNeeded(), ArmnnDriverImpl::PrepareArmnnModel(), and ArmnnDriverImpl::PrepareArmnnModelFromCache().

◆ GetNumInputs()

uint32_t armnn::GetNumInputs ( bool biasEnabled )

Definition at line 454 of file Descriptors.cpp.

 {
     unsigned int numInputs = 2;
     if (biasEnabled)
     {
         numInputs = 3;
     }
     return numInputs;
 }

Referenced by FullyConnectedDescriptor::GetNumInputs(), Convolution2dDescriptor::GetNumInputs(), Convolution3dDescriptor::GetNumInputs(), and DepthwiseConvolution2dDescriptor::GetNumInputs().

◆ GetOffset()

unsigned int armnn::GetOffset	(	const TensorShape &	shape,
		unsigned int	b,
		unsigned int	h,
		unsigned int	w,
		unsigned int	c,
		const DataLayoutIndexed &	dataLayout
	)

Definition at line 15 of file SpaceToBatchNd.cpp.

 {
     // 3D Tensors
     unsigned int channelDimension3D = dataLayout.GetDataLayout() == DataLayout::NCHW ? 1 : 2;
     if (shape.GetNumDimensions() == 3)
     {
         return (b * shape[dataLayout.GetHeightIndex()] + h) * shape[channelDimension3D] + c;
     }
     // 4D Tensors
     else if (shape.GetNumDimensions() == 4)
     {
         if (dataLayout.GetDataLayout() == DataLayout::NHWC)
         {
             return ((b * shape[dataLayout.GetHeightIndex()] + h) * shape[dataLayout.GetWidthIndex()] + w) *
                    shape[dataLayout.GetChannelsIndex()] + c;
         }
         else
         {
             return ((b * shape[dataLayout.GetChannelsIndex()] + c) * shape[dataLayout.GetHeightIndex()] + h) *
                    shape[dataLayout.GetWidthIndex()] + w;
         }
     }
     else
     {
         throw InvalidArgumentException("Tensor rank must be either 3 or 4", CHECK_LOCATION());
     }
 }

References CHECK_LOCATION, DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumDimensions(), and DataLayoutIndexed::GetWidthIndex().

Referenced by SpaceToBatchNd(), and SpaceToDepth().

◆ GetOutputShapeRoundingAsCString()

constexpr char const* armnn::GetOutputShapeRoundingAsCString ( OutputShapeRounding rounding )

constexpr

Definition at line 139 of file TypesUtils.hpp.

 {
     switch (rounding)
     {
         case OutputShapeRounding::Ceiling:  return "Ceiling";
         case OutputShapeRounding::Floor:    return "Floor";
         default:                            return "Unknown";
     }
 }

References Ceiling, and Floor.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

◆ GetOutputTensor()

const armnn::Tensor armnn::GetOutputTensor	(	const LayerBindingId	layerId,
		const OutputTensors &	outputTensors
	)

Definition at line 1417 of file LoadedNetwork.cpp.

 {
     for (auto outputTensorPair : outputTensors)
     {
         LayerBindingId id = outputTensorPair.first;
         if (id == layerId)
         {
             return outputTensorPair.second;
         }
     }
     throw InvalidArgumentException("Output does not exist.");
 }

◆ GetOutputTensorData() [1/2]

DataType* armnn::GetOutputTensorData ( ITensorHandle * tensorHandle )

Definition at line 56 of file RefWorkloadUtils.hpp.

 {
     return reinterpret_cast<DataType*>(tensorHandle->Map());
 }

References ITensorHandle::Map().

◆ GetOutputTensorData() [2/2]

DataType * GetOutputTensorData	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 181 of file ClWorkloadUtils.hpp.

 {
     ITensorHandle* tensorHandle = data.m_Outputs[idx];
     return reinterpret_cast<DataType*>(tensorHandle->Map());
 }

References ITensorHandle::Map().

◆ GetOutputTensorDataBFloat16()

BFloat16* armnn::GetOutputTensorDataBFloat16	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 92 of file RefWorkloadUtils.hpp.

 {
     return GetOutputTensorData<BFloat16>(idx, data);
 }

◆ GetOutputTensorDataFloat()

float* armnn::GetOutputTensorDataFloat	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 68 of file RefWorkloadUtils.hpp.

 {
     return GetOutputTensorData<float>(idx, data);
 }

◆ GetOutputTensorDataHalf()

Half* armnn::GetOutputTensorDataHalf	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 80 of file RefWorkloadUtils.hpp.

 {
     return GetOutputTensorData<Half>(idx, data);
 }

◆ GetPaddingMethodAsCString()

constexpr char const* armnn::GetPaddingMethodAsCString ( PaddingMethod method )

constexpr

Definition at line 149 of file TypesUtils.hpp.

 {
     switch (method)
     {
         case PaddingMethod::Exclude:       return "Exclude";
         case PaddingMethod::IgnoreValue:   return "IgnoreValue";
         default:                           return "Unknown";
     }
 }

References Exclude, and IgnoreValue.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

◆ GetPaddingModeAsCString()

constexpr char const* armnn::GetPaddingModeAsCString ( PaddingMode mode )

constexpr

Definition at line 159 of file TypesUtils.hpp.

 {
     switch (mode)
     {
         case PaddingMode::Constant:   return "Exclude";
         case PaddingMode::Symmetric:  return "Symmetric";
         case PaddingMode::Reflect:    return "Reflect";
         default:                      return "Unknown";
     }
 }

References Constant, Reflect, and Symmetric.

Referenced by StringifyLayerParameters< PadDescriptor >::Serialize().

◆ GetPoolingAlgorithmAsCString()

constexpr char const* armnn::GetPoolingAlgorithmAsCString ( PoolingAlgorithm pooling )

constexpr

Definition at line 128 of file TypesUtils.hpp.

 {
     switch (pooling)
     {
         case PoolingAlgorithm::Average:  return "Average";
         case PoolingAlgorithm::Max:      return "Max";
         case PoolingAlgorithm::L2:       return "L2";
         default:                         return "Unknown";
     }
 }

References Average, L2, and Max.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

◆ GetProfilingService()

arm::pipe::IProfilingService & GetProfilingService ( armnn::RuntimeImpl * runtime )

Definition at line 59 of file TestUtils.cpp.

 {
     return *(runtime->m_ProfilingService.get());
 }

◆ GetReduceOperationAsCString()

constexpr char const* armnn::GetReduceOperationAsCString ( ReduceOperation reduce_operation )

constexpr

Definition at line 170 of file TypesUtils.hpp.

 {
     switch (reduce_operation)
     {
         case ReduceOperation::Sum:  return "Sum";
         case ReduceOperation::Max:  return "Max";
         case ReduceOperation::Mean: return "Mean";
         case ReduceOperation::Min:  return "Min";
         case ReduceOperation::Prod: return "Prod";
         default:                    return "Unknown";
     }
 }

References Max, Mean, Min, Prod, and Sum.

Referenced by StringifyLayerParameters< ReduceDescriptor >::Serialize().

◆ GetResizeMethodAsCString()

constexpr const char* armnn::GetResizeMethodAsCString ( ResizeMethod method )

constexpr

Definition at line 285 of file TypesUtils.hpp.

 {
     switch (method)
     {
         case ResizeMethod::Bilinear:        return "Bilinear";
         case ResizeMethod::NearestNeighbor: return "NearestNeighbour";
         default:                            return "Unknown";
     }
 }

References Bilinear, and NearestNeighbor.

Referenced by StringifyLayerParameters< ResizeDescriptor >::Serialize().

◆ GetSizePrefixedClContext()

const armnn::ClContext* armnn::GetSizePrefixedClContext ( const void * buf )

inline

Definition at line 144 of file ClContextSchema_generated.h.

                                                                        {
   return flatbuffers::GetSizePrefixedRoot<armnn::ClContext>(buf);
 }

◆ GetStatusAsCString()

constexpr char const* armnn::GetStatusAsCString ( Status status )

constexpr

Definition at line 21 of file TypesUtils.hpp.

 {
     switch (status)
     {
         case armnn::Status::Success: return "Status::Success";
         case armnn::Status::Failure: return "Status::Failure";
         default:                     return "Unknown";
     }
 }

References Failure, and Success.

Referenced by operator<<().

◆ GetTensorInfo()

const TensorInfo& armnn::GetTensorInfo ( const ITensorHandle * tensorHandle )

inline

float32 helpers

Definition at line 33 of file RefWorkloadUtils.hpp.

 {
     // We know that reference workloads use RefTensorHandles for inputs and outputs
     const TensorHandleType* refTensorHandle =
         PolymorphicDowncast<const TensorHandleType*>(tensorHandle);
     return refTensorHandle->GetTensorInfo();
 }

Referenced by BatchNormImpl(), Concatenate(), ExecuteFunction(), Split(), Splitter(), FillLayer::ValidateTensorShapesFromInputs(), SwitchLayer::ValidateTensorShapesFromInputs(), ConstantLayer::ValidateTensorShapesFromInputs(), DetectionPostProcessLayer::ValidateTensorShapesFromInputs(), SplitterLayer::ValidateTensorShapesFromInputs(), LstmLayer::ValidateTensorShapesFromInputs(), QuantizedLstmLayer::ValidateTensorShapesFromInputs(), and QLstmLayer::ValidateTensorShapesFromInputs().

◆ GetTimeDuration()

std::chrono::duration<double, std::milli> armnn::GetTimeDuration ( std::chrono::high_resolution_clock::time_point start_time )

inline

Definition at line 19 of file Timer.hpp.

 {
     return std::chrono::duration<double, std::milli>(GetTimeNow() - start_time);
 }

References GetTimeNow().

Referenced by RuntimeImpl::EnqueueWorkload(), RuntimeImpl::Execute(), and RuntimeImpl::~RuntimeImpl().

◆ GetTimeNow()

std::chrono::high_resolution_clock::time_point armnn::GetTimeNow ( )

inline

Definition at line 14 of file Timer.hpp.

 {
     return std::chrono::high_resolution_clock::now();
 }

Referenced by RuntimeImpl::EnqueueWorkload(), RuntimeImpl::Execute(), GetTimeDuration(), RuntimeImpl::RuntimeImpl(), and RuntimeImpl::~RuntimeImpl().

◆ GetUnaryOperationAsCString()

constexpr char const* armnn::GetUnaryOperationAsCString ( UnaryOperation operation )

constexpr

Definition at line 92 of file TypesUtils.hpp.

 {
     switch (operation)
     {
         case UnaryOperation::Abs:        return "Abs";
         case UnaryOperation::Ceil:       return "Ceil";
         case UnaryOperation::Exp:        return "Exp";
         case UnaryOperation::Sqrt:       return "Sqrt";
         case UnaryOperation::Rsqrt:      return "Rsqrt";
         case UnaryOperation::Neg:        return "Neg";
         case UnaryOperation::Log:        return "Log";
         case UnaryOperation::LogicalNot: return "LogicalNot";
         case UnaryOperation::Sin:        return "Sin";
         default:                         return "Unknown";
     }
 }

References Abs, Ceil, Exp, Log, LogicalNot, Neg, Rsqrt, Sin, and Sqrt.

Referenced by StringifyLayerParameters< ElementwiseUnaryDescriptor >::Serialize().

◆ GetUnpaddedTensorStrides()

TensorShape GetUnpaddedTensorStrides ( const TensorInfo & tensorInfo )

Definition at line 15 of file TensorHandle.cpp.

 {
     TensorShape shape(tensorInfo.GetShape());
     auto size = GetDataTypeSize(tensorInfo.GetDataType());
     auto runningSize = size;
     std::vector<unsigned int> strides(shape.GetNumDimensions());
     auto lastIdx = shape.GetNumDimensions()-1;
     for (unsigned int i=0; i < lastIdx ; i++)
     {
         strides[lastIdx-i] = runningSize;
         runningSize *= shape[lastIdx-i];
     }
     strides[0] = runningSize;
     return TensorShape(shape.GetNumDimensions(), strides.data());
 }

References TensorInfo::GetDataType(), GetDataTypeSize(), TensorShape::GetNumDimensions(), and TensorInfo::GetShape().

Referenced by TosaRefTensorHandle::GetStrides(), SampleTensorHandle::GetStrides(), RefTensorHandle::GetStrides(), ConstTensorHandle::GetStrides(), and RefTensorHandleDecorator::GetStrides().

◆ GetVersion()

const std::string GetVersion ( )

Definition at line 77 of file Utils.cpp.

 {
     return ARMNN_VERSION;
 }

References ARMNN_VERSION.

◆ GpuFsaActivationCreateOp()

void GpuFsaActivationCreateOp	(	GpuFsaPreCompiledBlob *	blob,
		const TensorInfo &	input,
		const ActivationDescriptor &	descriptor
	)

Definition at line 58 of file GpuFsaActivation.cpp.

 {
     GpuWorkloadSketch* sketch           = blob->sketch.get();
     GpuWorkloadContext* workloadContext = blob->workloadContext.get();
     std::vector<arm_compute::ITensorInfo*> inputTensorInfos  = {};
     std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
  
     arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
  
     aclInput0Info.set_are_values_constant(input.IsConstant());
  
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info));
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus{};
     switch (descriptor.m_Function)
     {
         case ActivationFunction::TanH:
         {
             aclStatus = GpuTanh::validate_op(*sketch, inputTensorInfos[0]);
             break;
         }
         case ActivationFunction::Sigmoid:
         {
             aclStatus = GpuSigmoid::validate_op(*sketch, inputTensorInfos[0]);
             break;
         }
         default:
             throw InvalidArgumentException(std::string("Activation function currently not supported in GpuFsa: ")
                                            + GetActivationFunctionAsCString(descriptor.m_Function));
  
     }
     const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
     if (!supported)
     {
         throw BackendCapabilityException("\"GpuFsa\" backend failed during Activation layer validation");
     }
  
     arm_compute::ITensorInfo* activationOutputInfo{};
     switch (descriptor.m_Function)
     {
         case ActivationFunction::TanH:
         {
             activationOutputInfo = GpuTanh::create_op(*sketch, inputTensorInfos[0]);
             break;
         }
         case ActivationFunction::Sigmoid:
         {
             activationOutputInfo = GpuSigmoid::create_op(*sketch, inputTensorInfos[0]);
             break;
         }
         default:
             throw InvalidArgumentException(std::string("Activation function currently not supported in GpuFsa: ")
                                            + GetActivationFunctionAsCString(descriptor.m_Function));
  
     }
  
     // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
     outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
     GpuOutput::create_op(*sketch, activationOutputInfo, outputTensorInfos[0]);
  
     // Store the TensorInfos within the blob as unique_ptrs to be used later
     blob->inputTensorInfos  = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
     blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaActivationValidate()

arm_compute::Status GpuFsaActivationValidate	(	const TensorInfo &	input,
		const ActivationDescriptor &	descriptor
	)

Definition at line 22 of file GpuFsaActivation.cpp.

 {
     // Create a new workload sketch, for validation purposes
     auto compileCtx         = arm_compute::CLKernelLibrary::get().get_compile_context();
     auto workloadContext    = GpuWorkloadContext(&compileCtx);
     GpuWorkloadSketch sketch{ &workloadContext };
  
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
     aclInputInfo.set_are_values_constant(input.IsConstant());
  
     arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
  
     switch (descriptor.m_Function)
     {
         case ActivationFunction::TanH:
         {
             if ( descriptor.m_A != 1 || descriptor.m_B != 1)
             {
                  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                             "Activation function TanH only works with a=1 and b=1");
             }
             return GpuTanh::validate_op(sketch, inputInfo);
         }
         case ActivationFunction::Sigmoid:
         {
             return GpuSigmoid::validate_op(sketch, inputInfo);
         }
         default:
             return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                        std::string("Activation function currently not supported in GpuFsa: ")
                                            + GetActivationFunctionAsCString(descriptor.m_Function));
     }
  
 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaBackendId()

constexpr const char* armnn::GpuFsaBackendId ( )

constexpr

Definition at line 10 of file GpuFsaBackendId.hpp.

10 { return "GpuFsa"; }

Referenced by GpuFsaBackend::GetIdStatic().

◆ GpuFsaBatchMatMulCreateOp()

void GpuFsaBatchMatMulCreateOp	(	GpuFsaPreCompiledBlob *	blob,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const BatchMatMulDescriptor &	descriptor
	)

Definition at line 51 of file GpuFsaBatchMatMul.cpp.

 {
     GpuWorkloadSketch* sketch           = blob->sketch.get();
     GpuWorkloadContext* workloadContext = blob->workloadContext.get();
     std::vector<arm_compute::ITensorInfo*> inputTensorInfos  = {};
     std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
  
     arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
     arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
  
     aclInput0Info.set_are_values_constant(input0.IsConstant());
     aclInput1Info.set_are_values_constant(input1.IsConstant());
  
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info));
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput1Info));
  
     MatMulAttributes matMulAttributes{};
     matMulAttributes.adj_lhs(descriptor.m_TransposeX);
     matMulAttributes.adj_rhs(descriptor.m_TransposeY);
     GpuMatMulSettings matmulSettings{};
     matmulSettings.m0(1);
     matmulSettings.n0(1);
     matmulSettings.k0(1);
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus = GpuMatMul::validate_op(*sketch,
                                                            inputTensorInfos[0],
                                                            inputTensorInfos[1],
                                                            matMulAttributes,
                                                            matmulSettings);
  
     const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
     if (!supported)
     {
         throw BackendCapabilityException("\"GpuFsa\" backend failed during elementwise binary add validation");
     }
  
     arm_compute::ITensorInfo* addOutputInfo = GpuMatMul::create_op(*sketch,
                                                                    inputTensorInfos[0],
                                                                    inputTensorInfos[1],
                                                                    matMulAttributes,
                                                                    matmulSettings);
  
     // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
     outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
     GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
  
     // Store the TensorInfos within the blob as unique_ptrs to be used later
     blob->inputTensorInfos  = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
     blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaBatchMatMulValidate()

arm_compute::Status GpuFsaBatchMatMulValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const BatchMatMulDescriptor &	descriptor
	)

Definition at line 22 of file GpuFsaBatchMatMul.cpp.

 {
     // Create a new workload sketch, for validation purposes
     auto compileCtx         = arm_compute::CLKernelLibrary::get().get_compile_context();
     auto workloadContext    = GpuWorkloadContext(&compileCtx);
     GpuWorkloadSketch sketch{ &workloadContext };
  
     arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
     arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
  
     aclInput0Info.set_are_values_constant(input0.IsConstant());
     aclInput1Info.set_are_values_constant(input1.IsConstant());
  
     arm_compute::ITensorInfo*  inputInfo0 = workloadContext.create_tensor_info(aclInput0Info);
     arm_compute::ITensorInfo*  inputInfo1 = workloadContext.create_tensor_info(aclInput1Info);
  
     MatMulAttributes matMulAttributes{};
     matMulAttributes.adj_lhs(descriptor.m_TransposeX);
     matMulAttributes.adj_rhs(descriptor.m_TransposeY);
     GpuMatMulSettings matmulSettings{};
     matmulSettings.m0(1);
     matmulSettings.n0(1);
     matmulSettings.k0(1);
  
     return GpuMatMul::validate_op(sketch, inputInfo0, inputInfo1, matMulAttributes, matmulSettings);
 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaCastCreateOp()

void GpuFsaCastCreateOp	(	GpuFsaPreCompiledBlob *	blob,
		const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 61 of file GpuFsaCast.cpp.

 {
     using namespace armcomputetensorutils;
  
     GpuWorkloadSketch* sketch           = blob->sketch.get();
     GpuWorkloadContext* workloadContext = blob->workloadContext.get();
     std::vector<arm_compute::ITensorInfo*> inputTensorInfos  = {};
     std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
  
     arm_compute::TensorInfo aclinputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
  
     aclinputInfo.set_are_values_constant(input.IsConstant());
  
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclinputInfo));
  
     CastAttributes cast_attr = CastAttributesFromTensorInfo(output);
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus = GpuCast::validate_op(*sketch, inputTensorInfos[0], cast_attr);
     const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK;
     if (!validated)
     {
         throw BackendCapabilityException("\"" + std::string(GpuFsaBackendId())
                                          + "\" backend failed during cast operator validation");
     }
  
     arm_compute::ITensorInfo* castOutputInfo =
             GpuCast::create_op(*sketch, inputTensorInfos[0], cast_attr);
  
     // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
     outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
     GpuOutput::create_op(*sketch, castOutputInfo, outputTensorInfos[0]);
  
     // Store the TensorInfos within the blob as unique_ptrs to be used later
     blob->inputTensorInfos  = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
     blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaCastValidate()

arm_compute::Status GpuFsaCastValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 33 of file GpuFsaCast.cpp.

 {
     using namespace armcomputetensorutils;
  
     // Create a new workload sketch, for validation purposes
     auto compileCtx         = arm_compute::CLKernelLibrary::get().get_compile_context();
     auto workloadContext    = GpuWorkloadContext(&compileCtx);
     GpuWorkloadSketch sketch{ &workloadContext };
  
     arm_compute::TensorInfo aclinputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
  
     aclinputInfo.set_are_values_constant(input.IsConstant());
  
     arm_compute::ITensorInfo*  inputInfo0 = workloadContext.create_tensor_info(aclinputInfo);
  
     CastAttributes cast_attr = CastAttributesFromTensorInfo(output);
  
     arm_compute::Status aclStatus = GpuCast::validate_op(sketch, inputInfo0, cast_attr);
 #ifndef NDEBUG
     const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK;
     if (!validated)
     {
         std::cout << "GpuFsaCastValidate failed: " << aclStatus.error_description() << std::endl;
     }
 #endif
     return aclStatus;
 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaConstantWorkloadValidate()

arm_compute::Status GpuFsaConstantWorkloadValidate ( const TensorInfo & output )

Definition at line 17 of file GpuFsaConstantWorkload.cpp.

 {
     const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     std::array<arm_compute::DataType,8> supportedTypes = {
             arm_compute::DataType::F16,
             arm_compute::DataType::F32,
             arm_compute::DataType::QASYMM8,
             arm_compute::DataType::QASYMM8_SIGNED,
             arm_compute::DataType::QSYMM16,
             arm_compute::DataType::QSYMM8,
             arm_compute::DataType::QSYMM8_PER_CHANNEL,
             arm_compute::DataType::S32
     };
     auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
  
     if (it != end(supportedTypes))
     {
         return arm_compute::Status{};
     }
     else
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
     }
 }

◆ GpuFsaConvolution2dCreateOp()

void GpuFsaConvolution2dCreateOp	(	GpuFsaPreCompiledBlob *	blob,
		const TensorInfo &	input,
		const Convolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases
	)

Definition at line 70 of file GpuFsaConvolution2d.cpp.

 {
 /*
  * Creating an Op for the GpuFsa backend requires us to create and maintain quite a bit of data, which is then stored
  * in a GpuFsaPreCompiledBlob for execution later. Specifically we need:
  * GpuWorkloadContext, this contains the TensorInfos and is unique to the Graph being executed
  * Sketch, this is similar to a subgraph and can contain one or more operations. Multiple ops can be "fused" together
  * using a single sketch.
  * The inputTensorinfos / outputTensorInfos, these are pointers to the TensorInfos used when creating the sketch.
  * They refer to the TensorInfos stored within the GpuWorkloadContext and are needed when executing the sketch
  * as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime
  * doesn't know which Tensors to use.
  */
     GpuWorkloadSketch* sketch = blob->sketch.get();
     GpuWorkloadContext* workloadContext = blob->workloadContext.get();
     std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
     std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
  
     // Build and create tensor infos using the sketch
     const arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     arm_compute::TensorInfo       aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
  
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclWeightsInfo));
  
     // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op / create_op
     arm_compute::TensorInfo aclBiasInfo;
     arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
  
     if (descriptor.m_BiasEnabled)
     {
         if(!biases.has_value())
         {
             throw InvalidArgumentException("GpuFsaConvolution2d::CreateOp: No biases set when biases are enabled");
         }
         aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
  
         inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclBiasInfo));
         biasSketchInfoPtr = inputTensorInfos[2];
     }
  
     Conv2dAttributes conv2dAttributes = CreateConv2dAttributes(descriptor);
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus = GpuConv2d::validate_op(*sketch,
                                                            inputTensorInfos[0],
                                                            inputTensorInfos[1],
                                                            biasSketchInfoPtr,
                                                            conv2dAttributes);
  
     const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
     if (!supported)
     {
         throw BackendCapabilityException("\"GpuFsa\" backend failed during Convolution2D operation validation");
     }
  
     // Create the Op within the Sketch using the TensorInfos we have stored
     arm_compute::ITensorInfo* convOutInfo = GpuConv2d::create_op(*sketch,
                                                                  inputTensorInfos[0],
                                                                  inputTensorInfos[1],
                                                                  biasSketchInfoPtr,
                                                                  conv2dAttributes);
  
     // Create the Output
     outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
     GpuOutput::create_op(*sketch, convOutInfo, outputTensorInfos[0]);
  
     // Store the TensorInfos within the blob as unique_ptrs to be used later
     blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
     blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaConvolution2dValidate()

arm_compute::Status GpuFsaConvolution2dValidate	(	const TensorInfo &	input,
		const Convolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases
	)

Definition at line 24 of file GpuFsaConvolution2d.cpp.

 {
     // Create a new workload sketch, for validation purposes
     auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
     auto workloadContext = GpuWorkloadContext(&compileCtx);
     GpuWorkloadSketch sketch{ &workloadContext };
  
     // Build and create tensor infos using the sketch
     const arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     arm_compute::TensorInfo       aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
  
     auto inputInfo  = workloadContext.create_tensor_info(aclInputInfo);
     auto weightInfo = workloadContext.create_tensor_info(aclWeightsInfo);
  
     // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
     arm_compute::TensorInfo aclBiasInfo;
     arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
  
     if (descriptor.m_BiasEnabled)
     {
         if(!biases.has_value())
         {
             throw InvalidArgumentException("GpuFsaConvolution2d::ValidateOp: No biases set when biases are enabled");
         }
         aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
  
         biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo);
     }
  
     Conv2dAttributes conv2dAttributes = CreateConv2dAttributes(descriptor);
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus = GpuConv2d::validate_op(sketch,
                                                            inputInfo,
                                                            weightInfo,
                                                            biasSketchInfoPtr,
                                                            conv2dAttributes);
  
     return aclStatus;
 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaDepthwiseConvolution2dCreateOp()

void GpuFsaDepthwiseConvolution2dCreateOp	(	GpuFsaPreCompiledBlob *	blob,
		const TensorInfo &	input,
		const DepthwiseConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases
	)

Definition at line 89 of file GpuFsaDepthwiseConvolution2d.cpp.

 {
 /*
 * Creating an Op for the GpuFsa backend requires us to create and maintain quite a bit of data, which is then stored
 * in a GpuFsaPreCompiledBlob for execution later. Specifically we need:
 * GpuWorkloadContext, this contains the TensorInfos and is unique to the Graph being executed
 * Sketch, this is similar to a subgraph and can contain one or more operations. Multiple ops can be "fused" together
 * using a single sketch.
 * The inputTensorinfos / outputTensorInfos, these are pointers to the TensorInfos used when creating the sketch.
 * They refer to the TensorInfos stored within the GpuWorkloadContext and are needed when executing the sketch
 * as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime
 * doesn't know which Tensors to use.
 */
     GpuWorkloadSketch* sketch = blob->sketch.get();
     GpuWorkloadContext* workloadContext = blob->workloadContext.get();
     std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
     std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
  
     // Build and create tensor infos using the sketch
     const arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
  
     // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
     //
     // ACL format for weights for depthwise is:
     // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
     // - [1, C, H, W] for [N, C, H, W] input/output layout
     //
     // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
     // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
     // so we do the permute here for the TensorInfo weights.
     unsigned int aclDepthMultiplier;
     TensorInfo weightsPermuted;
     std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
     auto weightsShape = weightsPermuted.GetShape();
     weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
  
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
  
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclWeightsInfo));
  
     // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
     arm_compute::TensorInfo aclBiasInfo;
     arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
  
     if (descriptor.m_BiasEnabled)
     {
         if(!biases.has_value())
         {
             throw InvalidArgumentException("GpuFsaConvolution2dValidate: No biases set when biases are enabled");
         }
         aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
  
         inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclBiasInfo));
         biasSketchInfoPtr = inputTensorInfos[2];
     }
  
     DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier);
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(*sketch,
                                                                     inputTensorInfos[0],
                                                                     inputTensorInfos[1],
                                                                     biasSketchInfoPtr,
                                                                     depthwiseConv2dAttributes);
  
     const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
     if (!supported)
     {
         throw BackendCapabilityException(
             "\"GpuFsa\" backend failed during DepthwiseConvolution2D operation validation");
     }
  
     // Create the Op within the Sketch using the TensorInfos we have stored
     arm_compute::ITensorInfo* convOutInfo = GpuDepthwiseConv2d::create_op(*sketch,
                                                                           inputTensorInfos[0],
                                                                           inputTensorInfos[1],
                                                                           biasSketchInfoPtr,
                                                                           depthwiseConv2dAttributes);
  
     outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
     GpuOutput::create_op(*sketch, convOutInfo, outputTensorInfos[0]);
  
     // Store the TensorInfos within the blob as unique_ptrs to be used later
     blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
     blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaDepthwiseConvolution2dValidate()

arm_compute::Status GpuFsaDepthwiseConvolution2dValidate	(	const TensorInfo &	input,
		const DepthwiseConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases
	)

Definition at line 26 of file GpuFsaDepthwiseConvolution2d.cpp.

 {
     // Create a new workload sketch, for validation purposes
     auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
     auto workloadContext = GpuWorkloadContext(&compileCtx);
     GpuWorkloadSketch sketch{ &workloadContext };
  
     // Build and create tensor infos using the sketch
     const arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
  
     // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
     //
     // ACL format for weights for depthwise is:
     // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
     // - [1, C, H, W] for [N, C, H, W] input/output layout
     //
     // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
     // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
     // so we do the permute here for the TensorInfo weights.
     unsigned int aclDepthMultiplier;
     TensorInfo weightsPermuted;
     std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
     auto weightsShape = weightsPermuted.GetShape();
     weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
  
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
  
     auto inputInfo  = workloadContext.create_tensor_info(aclInputInfo);
     auto weightInfo = workloadContext.create_tensor_info(aclWeightsInfo);
  
     // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
     arm_compute::TensorInfo aclBiasInfo;
     arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
  
     if (descriptor.m_BiasEnabled)
     {
         if(!biases.has_value())
         {
             throw InvalidArgumentException(
                 "GpuFsaDepthwiseConvolution2dValidate: No biases set when biases are enabled");
         }
         aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
  
         biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo);
     }
  
     DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier);
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(sketch,
                                                                     inputInfo,
                                                                     weightInfo,
                                                                     biasSketchInfoPtr,
                                                                     depthwiseConv2dAttributes);
  
     return aclStatus;
 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaElementwiseBinaryCreateOp()

void GpuFsaElementwiseBinaryCreateOp	(	GpuFsaPreCompiledBlob *	blob,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const ElementwiseBinaryDescriptor &	descriptor
	)

Definition at line 63 of file GpuFsaElementwiseBinary.cpp.

 {
     GpuWorkloadSketch* sketch           = blob->sketch.get();
     GpuWorkloadContext* workloadContext = blob->workloadContext.get();
     std::vector<arm_compute::ITensorInfo*> inputTensorInfos  = {};
     std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
  
     arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
     arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
  
     aclInput0Info.set_are_values_constant(input0.IsConstant());
     aclInput1Info.set_are_values_constant(input1.IsConstant());
  
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info));
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput1Info));
  
     // Validate operator, check status and update reasonIfUnsupported
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus{};
     switch (descriptor.m_Operation)
     {
         case BinaryOperation::Add:
         {
             aclStatus = GpuAdd::validate_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
             break;
         }
         case BinaryOperation::Mul:
         {
             aclStatus = GpuMul::validate_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
             break;
         }
         case BinaryOperation::Sub:
         {
             aclStatus = GpuSub::validate_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
             break;
         }
         default:
             throw InvalidArgumentException(std::string("Elementwise Binary operation not supported in GpuFsa: ")
                                            + GetBinaryOperationAsCString(descriptor.m_Operation));
     }
  
     const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
     if (!supported)
     {
         throw BackendCapabilityException("\"GpuFsa\" backend failed during elementwise binary add validation");
     }
  
     arm_compute::ITensorInfo* elementwiseBinaryOutputInfo{};
     switch (descriptor.m_Operation)
     {
         case BinaryOperation::Add:
         {
             elementwiseBinaryOutputInfo = GpuAdd::create_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
             break;
         }
         case BinaryOperation::Mul:
         {
             elementwiseBinaryOutputInfo = GpuMul::create_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
             break;
         }
         case BinaryOperation::Sub:
         {
             elementwiseBinaryOutputInfo = GpuSub::create_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
             break;
         }
         default:
             throw InvalidArgumentException(std::string("Elementwise Binary operation not supported in GpuFsa: ")
                                            + GetBinaryOperationAsCString(descriptor.m_Operation));
     }
  
     // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
     outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
     GpuOutput::create_op(*sketch, elementwiseBinaryOutputInfo, outputTensorInfos[0]);
  
     // Store the TensorInfos within the blob as unique_ptrs to be used later
     blob->inputTensorInfos  = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
     blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaElementwiseBinaryValidate()

arm_compute::Status GpuFsaElementwiseBinaryValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const ElementwiseBinaryDescriptor &	descriptor
	)

Definition at line 24 of file GpuFsaElementwiseBinary.cpp.

 {
     // Create a new workload sketch, for validation purposes
     auto compileCtx         = arm_compute::CLKernelLibrary::get().get_compile_context();
     auto workloadContext    = GpuWorkloadContext(&compileCtx);
     GpuWorkloadSketch sketch{ &workloadContext };
  
     arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
     arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
  
     aclInput0Info.set_are_values_constant(input0.IsConstant());
     aclInput1Info.set_are_values_constant(input1.IsConstant());
  
     arm_compute::ITensorInfo*  inputInfo0 = workloadContext.create_tensor_info(aclInput0Info);
     arm_compute::ITensorInfo*  inputInfo1 = workloadContext.create_tensor_info(aclInput1Info);
  
     switch (descriptor.m_Operation)
     {
         case BinaryOperation::Add:
         {
             return GpuAdd::validate_op(sketch, inputInfo0, inputInfo1);
         }
         case BinaryOperation::Mul:
         {
             return GpuMul::validate_op(sketch, inputInfo0, inputInfo1);
         }
         case BinaryOperation::Sub:
         {
             return GpuSub::validate_op(sketch, inputInfo0, inputInfo1);
         }
         default:
             return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                        std::string("Elementwise Binary operation not supported in GpuFsa: ")
                                        + GetBinaryOperationAsCString(descriptor.m_Operation));
     }
 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaPooling2dCreateOp()

void GpuFsaPooling2dCreateOp	(	GpuFsaPreCompiledBlob *	blob,
		const TensorInfo &	input,
		const Pooling2dDescriptor &	descriptor
	)

Definition at line 40 of file GpuFsaPooling2d.cpp.

 {
     GpuWorkloadSketch* sketch           = blob->sketch.get();
     GpuWorkloadContext* workloadContext = blob->workloadContext.get();
     std::vector<arm_compute::ITensorInfo*> inputTensorInfos  = {};
     std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
  
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     aclInputInfo.set_are_values_constant(input.IsConstant());
  
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
  
     Pool2dAttributes pool2dAttributes = CreatePool2dAttributes(descriptor);
     GpuPool2dSettings pool2dSettings{};
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus = GpuPool2d::validate_op(*sketch,
                                                            inputTensorInfos[0],
                                                            pool2dAttributes,
                                                            pool2dSettings);
  
     const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
     if (!supported)
     {
         throw BackendCapabilityException("\"GpuFsa\" backend failed during pooling 2d validation");
     }
  
     arm_compute::ITensorInfo* addOutputInfo = GpuPool2d::create_op(*sketch,
                                                                    inputTensorInfos[0],
                                                                    pool2dAttributes,
                                                                    pool2dSettings);
  
     // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
     outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
     GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
  
     // Store the TensorInfos within the blob as unique_ptrs to be used later
     blob->inputTensorInfos  = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
     blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaPooling2dValidate()

arm_compute::Status GpuFsaPooling2dValidate	(	const TensorInfo &	input,
		const Pooling2dDescriptor &	descriptor
	)

Definition at line 22 of file GpuFsaPooling2d.cpp.

 {
     // Create a new workload sketch, for validation purposes
     auto compileCtx         = arm_compute::CLKernelLibrary::get().get_compile_context();
     auto workloadContext    = GpuWorkloadContext(&compileCtx);
     GpuWorkloadSketch sketch{ &workloadContext };
  
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     aclInputInfo.set_are_values_constant(input.IsConstant());
     arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
  
     Pool2dAttributes pool2dAttributes = CreatePool2dAttributes(descriptor);
     GpuPool2dSettings pool2dSettings{};
  
     return GpuPool2d::validate_op(sketch, inputInfo, pool2dAttributes, pool2dSettings);
 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaPreCompiledWorkloadValidate()

bool armnn::GpuFsaPreCompiledWorkloadValidate ( std::string * reasonIfUnsupported )

◆ GpuFsaReshapeCreateOp()

void GpuFsaReshapeCreateOp	(	GpuFsaPreCompiledBlob *	blob,
		const TensorInfo &	input,
		const ReshapeDescriptor &	descriptor
	)

Definition at line 49 of file GpuFsaReshape.cpp.

 {
     GpuWorkloadSketch*  sketch          = blob->sketch.get();
     GpuWorkloadContext* workloadContext = blob->workloadContext.get();
  
     std::vector<arm_compute::ITensorInfo*> inputTensorInfos;
     std::vector<arm_compute::ITensorInfo*> outputTensorInfos;
  
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
  
     aclInputInfo.set_are_values_constant(input.IsConstant());
  
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
  
     ReshapeAttributes attributes;
     attributes.shape(BuildArmComputeTensorShape(descriptor.m_TargetShape));
  
     arm_compute::ITensorInfo* addOutputInfo = GpuReshape::create_op(*sketch, inputTensorInfos[0], attributes);
  
     // Temporary fix until fusing attempt is made for GpuFsa backend and outputLayer workoad is created
     outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
     GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
  
     // Store the tensorInfos within the blob as std::unique_ptr<> so they can be used later
     blob->inputTensorInfos  = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
     blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaReshapeValidate()

arm_compute::Status GpuFsaReshapeValidate	(	const TensorInfo &	input,
		const ReshapeDescriptor &	descriptor
	)

Definition at line 22 of file GpuFsaReshape.cpp.

 {
     auto compileContext  = arm_compute::CLKernelLibrary::get().get_compile_context();
     auto workloadContext = GpuWorkloadContext(&compileContext);
  
     GpuWorkloadSketch sketch(&workloadContext);
  
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
     aclInputInfo.set_are_values_constant(input.IsConstant());
  
     arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
  
     ReshapeAttributes attributes;
     attributes.shape(BuildArmComputeTensorShape(descriptor.m_TargetShape));
  
     arm_compute::Status aclStatus = GpuReshape::validate_op(sketch, inputInfo, attributes);
  
 #ifndef NDEBUG
     if (aclStatus.error_code() != arm_compute::ErrorCode::OK)
     {
         std::cout << "GpuFsaReshapeValidate failed: " << aclStatus.error_description() << std::endl;
     }
 #endif
  
     return aclStatus;
 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaResizeCreateOp()

void GpuFsaResizeCreateOp	(	GpuFsaPreCompiledBlob *	blob,
		const TensorInfo &	input,
		const ResizeDescriptor &	descriptor
	)

Definition at line 39 of file GpuFsaResize.cpp.

 {
     GpuWorkloadSketch* sketch           = blob->sketch.get();
     GpuWorkloadContext* workloadContext = blob->workloadContext.get();
     std::vector<arm_compute::ITensorInfo*> inputTensorInfos  = {};
     std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
  
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     aclInputInfo.set_are_values_constant(input.IsConstant());
  
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
  
     ResizeAttributes resizeAttributes = CreateResizeAttributes(descriptor);
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus = GpuResize::validate_op(*sketch,
                                                            inputTensorInfos[0],
                                                            resizeAttributes);
  
     const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
     if (!supported)
     {
         throw BackendCapabilityException("\"GpuFsa\" backend failed during resize validation");
     }
  
     arm_compute::ITensorInfo* addOutputInfo = GpuResize::create_op(*sketch,
                                                                    inputTensorInfos[0],
                                                                    resizeAttributes);
  
     // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
     outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
     GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
  
     // Store the TensorInfos within the blob as unique_ptrs to be used later
     blob->inputTensorInfos  = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
     blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaResizeValidate()

arm_compute::Status GpuFsaResizeValidate	(	const TensorInfo &	input,
		const ResizeDescriptor &	descriptor
	)

Definition at line 22 of file GpuFsaResize.cpp.

 {
     // Create a new workload sketch, for validation purposes
     auto compileCtx         = arm_compute::CLKernelLibrary::get().get_compile_context();
     auto workloadContext    = GpuWorkloadContext(&compileCtx);
     GpuWorkloadSketch sketch{ &workloadContext };
  
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     aclInputInfo.set_are_values_constant(input.IsConstant());
     arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
  
     ResizeAttributes resizeAttributes = CreateResizeAttributes(descriptor);
  
     return GpuResize::validate_op(sketch, inputInfo, resizeAttributes);
 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaSoftmaxCreateOp()

void GpuFsaSoftmaxCreateOp	(	GpuFsaPreCompiledBlob *	blob,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const SoftmaxDescriptor &	descriptor
	)

Definition at line 63 of file GpuFsaSoftmax.cpp.

 {
     GpuWorkloadSketch* sketch           = blob->sketch.get();
     GpuWorkloadContext* workloadContext = blob->workloadContext.get();
     std::vector<arm_compute::ITensorInfo*> inputTensorInfos  = {};
     std::vector<arm_compute::ITensorInfo*> outputTensorInfos  = {};
  
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, output.GetNumDimensions());
     aclInputInfo.set_are_values_constant(input.IsConstant());
     aclOutputInfo.set_are_values_constant(output.IsConstant());
  
     inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
     outputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclOutputInfo));
  
     // Set Softmax attributes using descriptor
     SoftmaxAttributes softmaxAttributes{};
     softmaxAttributes.beta(descriptor.m_Beta); // Only used for LogSoftmax else default
     softmaxAttributes.is_log_softmax(false); // Use Softmax not LogSoftmax
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
     softmaxAttributes.axis(aclAxis);
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus = GpuSoftmax::validate_op(*sketch,
                                                             inputTensorInfos[0],
                                                             outputTensorInfos[0],
                                                             softmaxAttributes);
     const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
     if (!supported)
     {
         throw BackendCapabilityException("\"GpuFsa\" backend failed during softmax validation");
     }
  
     GpuSoftmax::create_op(*sketch, inputTensorInfos[0], outputTensorInfos[0], softmaxAttributes);
  
     // Store the TensorInfos within the blob as unique_ptrs to be used later
     blob->inputTensorInfos  = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
     blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaSoftmaxValidate()

arm_compute::Status GpuFsaSoftmaxValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SoftmaxDescriptor &	descriptor
	)

Definition at line 22 of file GpuFsaSoftmax.cpp.

 {
     // Create a new workload sketch, for validation purposes
     auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
     auto workloadContext = GpuWorkloadContext(&compileCtx);
     GpuWorkloadSketch sketch{ &workloadContext };
  
     // Build and create tensor infos using the sketch
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, output.GetNumDimensions());
     aclInputInfo.set_are_values_constant(input.IsConstant());
     aclOutputInfo.set_are_values_constant(output.IsConstant());
     arm_compute::ITensorInfo*  inputInfo = workloadContext.create_tensor_info(aclInputInfo);
     arm_compute::ITensorInfo*  outputInfo = workloadContext.create_tensor_info(aclOutputInfo);
  
     // Set Softmax attributes using descriptor
     SoftmaxAttributes softmaxAttributes{};
     softmaxAttributes.beta(descriptor.m_Beta);
     softmaxAttributes.is_log_softmax(false); // Use Softmax not LogSoftmax
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
     softmaxAttributes.axis(aclAxis);
  
     // Validate operator, check status and update reasonIfUnsupported
     arm_compute::Status aclStatus = GpuSoftmax::validate_op(sketch,
                                                            inputInfo,
                                                            outputInfo,
                                                            softmaxAttributes);
  
 #ifndef NDEBUG
     const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK;
     if (!validated)
     {
         std::cout << "GpuFsaSoftmaxValidate failed: " << aclStatus.error_description() << std::endl;
     }
 #endif
  
     return aclStatus;
 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaTensorHandleFactoryId()

constexpr const char* armnn::GpuFsaTensorHandleFactoryId ( )

constexpr

Definition at line 14 of file GpuFsaTensorHandleFactory.hpp.

14 { return "Arm/GpuFsa/TensorHandleFactory"; }

Referenced by GpuFsaTensorHandleFactory::GetIdStatic().

◆ HasCapability() [1/4]

bool HasCapability	(	const BackendOptions::BackendOption &	backendOption,
		const armnn::BackendId &	backend
	)

Convenience function to check if a given capability matches a capability in a backend.

Definition at line 80 of file BackendHelper.cpp.

 {
     return HasMatchingCapability(backendOption, backend);
 }

References HasMatchingCapability().

◆ HasCapability() [2/4]

bool HasCapability	(	const BackendOptions::BackendOption &	capability,
		const BackendCapabilities &	capabilities
	)

Convenience function to check if a given capability matches a capability in a BackendCapabilities struct.

Definition at line 75 of file BackendHelper.cpp.

 {
     return HasMatchingCapability(capability, capabilities);
 }

References HasMatchingCapability().

◆ HasCapability() [3/4]

bool HasCapability	(	const std::string &	name,
		const armnn::BackendId &	backend
	)

Convenience function to check if a capability exists in a backend.

Definition at line 70 of file BackendHelper.cpp.

 {
     return GetCapability(name, backend).has_value();
 }

References GetCapability().

◆ HasCapability() [4/4]

bool HasCapability	(	const std::string &	name,
		const BackendCapabilities &	capabilities
	)

Convenience function to check if a capability exists in a BackendCapabilites struct.

Definition at line 65 of file BackendHelper.cpp.

 {
     return GetCapability(name, capabilities).has_value();
 }

References GetCapability().

◆ HasMatchingCapability() [1/2]

bool HasMatchingCapability	(	const BackendOptions::BackendOption &	backendOption,
		const armnn::BackendId &	backend
	)

Convenience function to check if a given capability matches a capability in a backend.

Definition at line 117 of file BackendHelper.cpp.

 {
     auto const& backendRegistry = armnn::BackendRegistryInstance();
     if (backendRegistry.IsBackendRegistered(backend))
     {
         auto factoryFunc = backendRegistry.GetFactory(backend);
         auto backendObject = factoryFunc();
         auto capabilities = backendObject->GetCapabilities();
         return HasMatchingCapability(backendOption, capabilities);
     }
     return false;
 }

References BackendRegistryInstance(), and HasMatchingCapability().

◆ HasMatchingCapability() [2/2]

bool HasMatchingCapability	(	const BackendOptions::BackendOption &	capability,
		const BackendCapabilities &	capabilities
	)

Convenience function to check if a given capability matches a capability in a BackendCapabilities struct.

Definition at line 85 of file BackendHelper.cpp.

 {
     for (size_t i=0; i < capabilities.GetOptionCount(); i++)
     {
         const auto& backendCapability = capabilities.GetOption(i);
         if (capability.GetName() == backendCapability.GetName())
         {
             if (capability.GetValue().IsBool() && backendCapability.GetValue().IsBool())
             {
                 return capability.GetValue().AsBool() == backendCapability.GetValue().AsBool();
             }
             else if (capability.GetValue().IsFloat() && backendCapability.GetValue().IsFloat())
             {
                 return capability.GetValue().AsFloat() == backendCapability.GetValue().AsFloat();
             }
             else if (capability.GetValue().IsInt() && backendCapability.GetValue().IsInt())
             {
                 return capability.GetValue().AsInt() == backendCapability.GetValue().AsInt();
             }
             else if (capability.GetValue().IsString() && backendCapability.GetValue().IsString())
             {
                 return capability.GetValue().AsString() == backendCapability.GetValue().AsString();
             }
             else if (capability.GetValue().IsUnsignedInt() && backendCapability.GetValue().IsUnsignedInt())
             {
                 return capability.GetValue().AsUnsignedInt() == backendCapability.GetValue().AsUnsignedInt();
             }
         }
     }
     return false;
 }

References BackendOptions::Var::AsBool(), BackendOptions::Var::AsFloat(), BackendOptions::Var::AsInt(), BackendOptions::Var::AsString(), BackendOptions::Var::AsUnsignedInt(), BackendOptions::BackendOption::GetName(), BackendOptions::GetOption(), BackendOptions::GetOptionCount(), BackendOptions::BackendOption::GetValue(), BackendOptions::Var::IsBool(), BackendOptions::Var::IsFloat(), BackendOptions::Var::IsInt(), BackendOptions::Var::IsString(), and BackendOptions::Var::IsUnsignedInt().

Referenced by ArmnnDevice::ArmnnDevice(), CheckFp16Support(), HasCapability(), HasMatchingCapability(), LoadedNetwork::ImportInputs(), LoadedNetwork::ImportOutputs(), and RuntimeImpl::RuntimeImpl().

◆ IgnoreUnused()

void armnn::IgnoreUnused ( Ts && ... )

inline

Definition at line 14 of file IgnoreUnused.hpp.

14 {}

◆ IndexToCoordinates()

std::vector<uint32_t> armnn::IndexToCoordinates	(	std::vector< uint32_t > &	shape,
		uint32_t	index
	)

Definition at line 16 of file Tile.cpp.

 {
     std::vector<uint32_t> coordinates;
     // Iterating through dimensions starting from the last dimension to the first
     for (std::size_t i = shape.size() - 1; i < shape.size(); --i)
     {
         // Coordinate is found by getting the index and modulus it by the current dimension size
         // shape of dimension = dimension size
         coordinates.insert(coordinates.begin(), index % shape[i]);
         // Pass the index to next iteration making index = index / size of the current dimension
         index = index/shape[i];
     }
     return coordinates;
 }

Referenced by Tile().

◆ InitializeArmComputeClTensorData()

void InitializeArmComputeClTensorData	(	arm_compute::CLTensor &	clTensor,
		const ConstTensorHandle *	handle
	)

inline

Definition at line 124 of file ClWorkloadUtils.hpp.

 {
     ARMNN_ASSERT(handle);
  
     armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor);
     switch(handle->GetTensorInfo().GetDataType())
     {
         case DataType::Float16:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::Half>());
             break;
         case DataType::Float32:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<float>());
             break;
         case DataType::QAsymmU8:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<uint8_t>());
             break;
         case DataType::QAsymmS8:
         case DataType::QSymmS8:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int8_t>());
             break;
         case DataType::QSymmS16:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int16_t>());
             break;
         case DataType::Signed32:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int32_t>());
             break;
         case DataType::BFloat16:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::BFloat16>());
             break;
         default:
             // Throw exception; assertion not called in release build.
             throw Exception("Unexpected tensor type during InitializeArmComputeClTensorData().");
     }
 };

References ARMNN_ASSERT.

◆ InitializeArmComputeTensorData() [1/2]

void armnn::InitializeArmComputeTensorData	(	arm_compute::Tensor &	tensor,
		const ConstTensorHandle *	handle
	)

inline

Definition at line 104 of file NeonWorkloadUtils.hpp.

 {
     ARMNN_ASSERT(handle);
  
     switch(handle->GetTensorInfo().GetDataType())
     {
         case DataType::Float16:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<armnn::Half>());
             break;
         case DataType::Float32:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<float>());
             break;
         case DataType::QAsymmU8:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<uint8_t>());
             break;
         case DataType::QSymmS8:
         case DataType::QAsymmS8:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<int8_t>());
             break;
         case DataType::Signed32:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<int32_t>());
             break;
         case DataType::QSymmS16:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<int16_t>());
             break;
         case DataType::BFloat16:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<armnn::BFloat16>());
             break;
         default:
             // Throw exception; assertion not called in release build.
             throw Exception("Unexpected tensor type during InitializeArmComputeTensorData().");
     }
 };

References ARMNN_ASSERT, BFloat16, CopyArmComputeTensorData(), Float16, Float32, ConstTensorHandle::GetConstTensor(), TensorInfo::GetDataType(), ConstTensorHandle::GetTensorInfo(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ InitializeArmComputeTensorData() [2/2]

void armnn::InitializeArmComputeTensorData	(	arm_compute::Tensor &	tensor,
		TensorInfo	tensorInfo,
		const ITensorHandle *	handle
	)

inline

Definition at line 68 of file NeonWorkloadUtils.hpp.

 {
     ARMNN_ASSERT(handle);
  
     switch(tensorInfo.GetDataType())
     {
         case DataType::Float16:
             CopyArmComputeTensorData(tensor, reinterpret_cast<const armnn::Half*>(handle->Map()));
             break;
         case DataType::Float32:
             CopyArmComputeTensorData(tensor, reinterpret_cast<const float*>(handle->Map()));
             break;
         case DataType::QAsymmU8:
             CopyArmComputeTensorData(tensor, reinterpret_cast<const uint8_t*>(handle->Map()));
             break;
         case DataType::QSymmS8:
         case DataType::QAsymmS8:
             CopyArmComputeTensorData(tensor, reinterpret_cast<const int8_t*>(handle->Map()));
             break;
         case DataType::Signed32:
             CopyArmComputeTensorData(tensor, reinterpret_cast<const int32_t*>(handle->Map()));
             break;
         case DataType::QSymmS16:
             CopyArmComputeTensorData(tensor, reinterpret_cast<const int16_t*>(handle->Map()));
             break;
         case DataType::BFloat16:
             CopyArmComputeTensorData(tensor, reinterpret_cast<const armnn::BFloat16*>(handle->Map()));
             break;
         default:
             // Throw exception; assertion not called in release build.
             throw Exception("Unexpected tensor type during InitializeArmComputeTensorData().");
     }
 };

References ARMNN_ASSERT, BFloat16, CopyArmComputeTensorData(), Float16, Float32, TensorInfo::GetDataType(), ITensorHandle::Map(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

Referenced by NeonFullyConnectedWorkload::Execute(), and NeonConvolution2dWorkload::Execute().

◆ InsertConvertFp16ToFp32LayersBefore()

std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore	(	Graph &	graph,
		Layer &	layer,
		bool	expectCorrectInputType
	)

Definition at line 40 of file NetworkUtils.cpp.

 {
     std::vector<ConvertFp16ToFp32Layer*> convertLayers;
     convertLayers.reserve(layer.GetNumInputSlots());
  
     // Insert a ConvertFp16ToFp32Layer before each input slot
     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
     {
         bool allowInsert = true;
         if (expectCorrectInputType)
         {
             // Only insert ConvertFp16ToFp32Layer before FP16 input slots
             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
             allowInsert =
                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
         }
  
         if (allowInsert)
         {
             const std::string name =
                 std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
                 layer.GetName();
             ConvertFp16ToFp32Layer* convertLayer =
                 graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
  
             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::Float32);
  
             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
  
             convertLayers.emplace_back(convertLayer);
         }
     }
  
     return convertLayers;
 }

References Layer::BeginInputSlots(), Layer::EndInputSlots(), Float16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment(), and ConvertFp32NetworkToFp16Impl::Run().

◆ InsertConvertFp32ToFp16LayersAfter()

std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter	(	Graph &	graph,
		Layer &	layer
	)

Definition at line 79 of file NetworkUtils.cpp.

 {
     const unsigned int numOutputSlots = layer.GetNumOutputSlots();
  
     std::vector<ConvertFp32ToFp16Layer*> convertLayers;
     convertLayers.reserve(numOutputSlots);
  
     // Update FP16 output slots to FP32 on current layer
     ChangeOutputFp16ToFp32(layer);
  
     // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
     for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
     {
         OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
         if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
         {
             const std::string name =
                 std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
             ConvertFp32ToFp16Layer* convertLayer =
                 graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
  
             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::Float16);
  
             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
  
             convertLayers.emplace_back(convertLayer);
         }
     }
  
     return convertLayers;
 }

References Float16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment(), and ConvertFp32NetworkToFp16Impl::Run().

◆ InsertDebugLayerAfter()

std::vector< DebugLayer * > InsertDebugLayerAfter	(	Graph &	graph,
		Layer &	layer,
		bool	toFile
	)

Definition at line 112 of file NetworkUtils.cpp.

 {
     std::vector<DebugLayer*> debugLayers;
     debugLayers.reserve(layer.GetNumOutputSlots());
  
     // Connect a DebugLayer to each output slot of the layer
     uint32_t outputSlotIdx = 0;
     for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
     {
         const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr() + "_" +
             std::to_string(outputSlotIdx);
  
         DebugLayer* debugLayer =
             graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str(), toFile);
  
         // Sets output tensor info for the debug layer.
         ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
         TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
  
         debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
  
         // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
         debugLayer->SetBackendId(Compute::CpuRef);
  
         debugLayers.emplace_back(debugLayer);
  
         ++outputSlotIdx;
     }
  
     return debugLayers;
 }

References ARMNN_ASSERT, Layer::BeginOutputSlots(), CpuRef, Layer::EndOutputSlots(), InputSlot::GetConnectedOutputSlot(), Layer::GetInputSlot(), Layer::GetNameStr(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), Layer::SetBackendId(), and OutputSlot::SetTensorInfo().

Referenced by AddDebugImpl::Run(), and AddDebugToFileImpl::Run().

◆ InstanceNorm()

void InstanceNorm	(	const InstanceNormalizationQueueDescriptor &	data,
		const TensorInfo &	inputInfo,
		Decoder< float > &	inputDecoder,
		Encoder< float > &	outputEncoder
	)

Definition at line 18 of file InstanceNorm.cpp.

 {
     const TensorShape inputShape = inputInfo.GetShape();
  
     armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
  
     unsigned int inputBatches  = inputShape[0];
     unsigned int inputHeight   = inputShape[dataLayout.GetHeightIndex()];
     unsigned int inputWidth    = inputShape[dataLayout.GetWidthIndex()];
     unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
  
     float beta  = data.m_Parameters.m_Beta;
     float eps   = data.m_Parameters.m_Eps;
     float gamma = data.m_Parameters.m_Gamma;
  
     for (unsigned int n = 0; n < inputBatches; ++n)
     {
         for (unsigned int c = 0; c < inputChannels; ++c)
         {
             float mean = 0, var = 0;
  
             //Calculate Mean
             for (unsigned int h = 0; h < inputHeight; h++)
             {
                 for (unsigned int w = 0; w < inputWidth; w++)
                 {
                     unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
  
                     inputDecoder[index];
                     float value = inputDecoder.Get();
                     mean += value;
                 }
             }
             mean /= static_cast<float>(inputHeight * inputWidth);
  
             //Calculate Variance
             for (unsigned int h = 0; h < inputHeight; h++)
             {
                 for (unsigned int w = 0; w < inputWidth; w++)
                 {
                     unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
  
                     inputDecoder[index];
                     float value = inputDecoder.Get();
                     var += (value - mean) * (value - mean);
                 }
             }
             var /= static_cast<float>(inputHeight * inputWidth);
  
             // Apply Instance Normalisation
             for (unsigned int h = 0; h < inputHeight; ++h)
             {
                 for (unsigned int w = 0; w < inputWidth; ++w)
                 {
                     unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
                     inputDecoder[index];
                     outputEncoder[index];
                     outputEncoder.Set((inputDecoder.Get() - mean) * gamma /  std::sqrt ( var + eps) + beta);
                 }
  
             }
         }
     }
 }

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), InstanceNormalizationDescriptor::m_Beta, InstanceNormalizationDescriptor::m_DataLayout, InstanceNormalizationDescriptor::m_Eps, InstanceNormalizationDescriptor::m_Gamma, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

◆ IntersectionOverUnion()

float IntersectionOverUnion	(	const float *	boxI,
		const float *	boxJ
	)

Definition at line 31 of file DetectionPostProcess.cpp.

 {
     // Box-corner format: ymin, xmin, ymax, xmax.
     const int yMin = 0;
     const int xMin = 1;
     const int yMax = 2;
     const int xMax = 3;
     float areaI = (boxI[yMax] - boxI[yMin]) * (boxI[xMax] - boxI[xMin]);
     float areaJ = (boxJ[yMax] - boxJ[yMin]) * (boxJ[xMax] - boxJ[xMin]);
     float yMinIntersection = std::max(boxI[yMin], boxJ[yMin]);
     float xMinIntersection = std::max(boxI[xMin], boxJ[xMin]);
     float yMaxIntersection = std::min(boxI[yMax], boxJ[yMax]);
     float xMaxIntersection = std::min(boxI[xMax], boxJ[xMax]);
     float areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) *
                                 std::max(xMaxIntersection - xMinIntersection, 0.0f);
     float areaUnion = areaI + areaJ - areaIntersection;
     return areaIntersection / areaUnion;
 }

Referenced by NonMaxSuppression().

◆ IsBFloat16()

bool armnn::IsBFloat16 ( const WorkloadInfo & info )

Definition at line 56 of file RefWorkloadFactory.cpp.

 {
     return IsDataType<DataType::BFloat16>(info);
 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsDataType()

bool IsDataType ( const WorkloadInfo & info )

Definition at line 32 of file GpuFsaWorkloadFactory.cpp.

 {
     auto checkType = [](const TensorInfo& tensorInfo) {return tensorInfo.GetDataType() == ArmnnType;};
     auto it = std::find_if(std::begin(info.m_InputTensorInfos), std::end(info.m_InputTensorInfos), checkType);
     if (it != std::end(info.m_InputTensorInfos))
     {
         return true;
     }
     it = std::find_if(std::begin(info.m_OutputTensorInfos), std::end(info.m_OutputTensorInfos), checkType);
     if (it != std::end(info.m_OutputTensorInfos))
     {
         return true;
     }
     return false;
 }

References info.

◆ IsFloat16()

bool armnn::IsFloat16 ( const WorkloadInfo & info )

Definition at line 60 of file RefWorkloadFactory.cpp.

 {
     return IsDataType<DataType::Float16>(info);
 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsGpuFsaBackendSupported()

bool armnn::IsGpuFsaBackendSupported	(	Optional< std::string & >	reasonIfUnsupported,
		Args...	args
	)

Definition at line 31 of file GpuFsaLayerSupport.cpp.

 {
     IgnoreUnused(reasonIfUnsupported, (args)...);
 #if defined(ARMCOMPUTEGPUFSA_ENABLED)
     return true;
 #else
     if (reasonIfUnsupported)
     {
         reasonIfUnsupported.value() = "The armnn library has been built without CL support";
     }
     return false;
 #endif
 }

References IgnoreUnused(), and OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ IsLayerOptimizable() [1/2]

bool armnn::IsLayerOptimizable ( const armnn::Layer & layer )

Definition at line 99 of file MockBackend.cpp.

 {
     return IsLayerOptimizable(&layer);
 }

◆ IsLayerOptimizable() [2/2]

bool armnn::IsLayerOptimizable ( const armnn::Layer * layer )

Definition at line 88 of file MockBackend.cpp.

 {
     ARMNN_ASSERT(layer != nullptr);
  
     // A Layer is not optimizable if its name contains "unoptimizable"
     const std::string layerName(layer->GetName());
     bool optimizable = layerName.find("unoptimizable") == std::string::npos;
  
     return optimizable;
 }

References ARMNN_ASSERT, and Layer::GetName().

◆ IsLayerSequence()

bool armnn::IsLayerSequence	(	Layer &	currentLayer,
		TYPE	first,
		TYPE	second,
		TYPE	third,
		Layer *	layerList[4],
		bool	handleValidActivates,
		const std::vector< ActivationFunction > &	validActivates
	)

Definition at line 375 of file SubgraphUtils.hpp.

 {
     auto PreviousLayer = [](Layer& layer)
     {
         return &layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
     };
  
     auto NextLayer = [](Layer& layer)
     {
         return &layer.GetOutputSlot(0).GetConnection(0)->GetOwningLayer();
     };
  
     auto LayerIncomingConnectionDataType = [](Layer& layer)
     {
         return layer.GetInputSlot(0).GetTensorInfo().GetDataType();
     };
  
     bool result = false;
  
     // Match in reverse so there is only 1 connection to check
     if (IsSequenceLayerType(currentLayer, third))
     {
         // Save DataType of third layer
         DataType dataType = LayerIncomingConnectionDataType(currentLayer);
  
         // Save third layer
         layerList[2] = &currentLayer;
  
         // Check the layers that proceed this one for the requested grouping
         Layer *prevLayer = PreviousLayer(currentLayer);
         if (prevLayer && IsSequenceLayerType(*prevLayer, second))
         {
             bool dataTypesMatch = (dataType == LayerIncomingConnectionDataType(*prevLayer));
             if (! dataTypesMatch)
             {
                 return result;
             }
  
             layerList[1] = prevLayer;
             prevLayer = PreviousLayer(*prevLayer);
             if (prevLayer && IsSequenceLayerType(*prevLayer, first))
             {
                 dataTypesMatch = (dataType == LayerIncomingConnectionDataType(*prevLayer));
                 if (! dataTypesMatch)
                 {
                     return result;
                 }
  
                 layerList[0] = prevLayer;
  
                 // Detected the first 3 layers if we get to this point so now
                 // check to see if we have a valid activation. If there is no activation
                 // then the sequence still matches.
                 if (handleValidActivates)
                 {
                     Layer *nextLayer = NextLayer(currentLayer);
                     if (nextLayer)
                     {
                         if (IsSequenceLayerType(*nextLayer, LayerType::Activation))
                         {
                             // This layer is an activation, so it must be a valid type for the sequence
                             ActivationFunction activationFunction =
                                     PolymorphicDowncast<ActivationLayer*>(nextLayer)->GetParameters().m_Function;
                             long count = std::count(validActivates.cbegin(),
                                                     validActivates.cend(),
                                                     activationFunction);
                             if (count > 0)
                             {
                                 layerList[3] = nextLayer;
                                 result = true;
                             }
                         }
                         else
                         {
                             // Next layer is not an activation so sequence still matches
                             result = true;
                         }
                     }
                 }
                 else
                 {
                     result = true;
                 }
             }
         }
     }
  
     return result;
 }

◆ IsLayerSupported() [1/2]

bool armnn::IsLayerSupported ( const armnn::Layer & layer )

Definition at line 83 of file MockBackend.cpp.

 {
     return IsLayerSupported(&layer);
 }

◆ IsLayerSupported() [2/2]

bool armnn::IsLayerSupported ( const armnn::Layer * layer )

Definition at line 62 of file MockBackend.cpp.

 {
     ARMNN_ASSERT(layer != nullptr);
  
     armnn::LayerType layerType = layer->GetType();
     switch (layerType)
     {
         case armnn::LayerType::Input:
         case armnn::LayerType::Output:
         case armnn::LayerType::Constant:
         case armnn::LayerType::Addition:
         case armnn::LayerType::Convolution2d:
         case armnn::LayerType::ElementwiseBinary:
             // Layer supported
             return true;
         default:
             // Layer unsupported
             return false;
     }
 }

References Addition, ARMNN_ASSERT, Constant, Convolution2d, ElementwiseBinary, Layer::GetType(), Input, and Output.

Referenced by SampleDynamicWorkloadFactory::IsLayerSupported().

◆ IsLayerTypeSupported()

bool armnn::IsLayerTypeSupported	(	const LayerType &	type,
		const std::vector< TensorInfo > &	infos,
		const BaseDescriptor &	descriptor,
		const Optional< LstmInputParamsInfo > &	lstmParamsInfo,
		const Optional< QuantizedLstmInputParamsInfo > &	quantizedLstmParamsInfo,
		Optional< std::string & >	reasonIfUnsupported,
		const NeonLayerSupport &	support
	)

Definition at line 172 of file NeonLayerSupport.cpp.

 {
     switch (type)
     {
         case LayerType::Activation:
             return support.IsActivationSupported(infos[0],
                                                  infos[1],
                                                  *(PolymorphicDowncast<const ActivationDescriptor*>(&descriptor)),
                                                  reasonIfUnsupported);
         case LayerType::Addition:
             return support.IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
         case LayerType::ArgMinMax:
             return support.IsArgMinMaxSupported(infos[0],
                                                 infos[1],
                                                 *(PolymorphicDowncast<const ArgMinMaxDescriptor*>(&descriptor)),
                                                 reasonIfUnsupported);
         case LayerType::BatchMatMul:
             return support.IsBatchMatMulSupported(infos[0],
                                                   infos[1],
                                                   infos[2],
                                                   *(PolymorphicDowncast<const BatchMatMulDescriptor*>(&descriptor)),
                                                   reasonIfUnsupported);
         case LayerType::BatchNormalization:
             return support.IsBatchNormalizationSupported(infos[0],
                                                          infos[1],
                                                          infos[2],
                                                          infos[3],
                                                          infos[4],
                                                          infos[5],
                                                          *(PolymorphicDowncast<const
                                                              BatchNormalizationDescriptor*>(&descriptor)),
                                                          reasonIfUnsupported);
         case LayerType::BatchToSpaceNd:
             return support.IsBatchToSpaceNdSupported(infos[0],
                                                      infos[1],
                                                      *(PolymorphicDowncast<const
                                                         BatchToSpaceNdDescriptor*>(&descriptor)),
                                                      reasonIfUnsupported);
         case LayerType::Cast:
             return support.IsCastSupported(infos[0], infos[1], reasonIfUnsupported);
         case LayerType::ChannelShuffle:
             return support.IsChannelShuffleSupported(infos[0],
                                                      infos[1],
                                                      *(PolymorphicDowncast<const
                                                          ChannelShuffleDescriptor*>(&descriptor)),
                                                      reasonIfUnsupported);
         case LayerType::Comparison:
             return support.IsComparisonSupported(infos[0],
                                                  infos[1],
                                                  infos[2],
                                                  *(PolymorphicDowncast<const ComparisonDescriptor*>(&descriptor)),
                                                  reasonIfUnsupported);
         case LayerType::Concat:
         {
             std::vector<const TensorInfo*> inputInfos;
             for (uint32_t i = 0; i < (infos.size() - 1); i++)
             {
                 inputInfos.push_back(&infos[i]);
             }
             return support.IsConcatSupported(inputInfos,
                                              infos[infos.size() - 1],
                                              *(PolymorphicDowncast<const OriginsDescriptor*>(&descriptor)),
                                              reasonIfUnsupported);
         }
         case LayerType::Constant:
             return support.IsConstantSupported(infos[0], reasonIfUnsupported);
         case LayerType::ConvertFp16ToFp32:
             return support.IsConvertFp16ToFp32Supported(infos[0], infos[1], reasonIfUnsupported);
         case LayerType::ConvertFp32ToFp16:
             return support.IsConvertFp32ToFp16Supported(infos[0], infos[1], reasonIfUnsupported);
         case LayerType::Convolution2d:
         {
             if (infos.size() != 4)
             {
                 throw InvalidArgumentException("Invalid number of TransposeConvolution2d TensorInfos. "
                                                "TensorInfos should be of format: {input, output, weights, biases}.");
             }
  
             auto desc = *(PolymorphicDowncast<const Convolution2dDescriptor*>(&descriptor));
             if (infos[3] == TensorInfo())
             {
                 return support.IsConvolution2dSupported(infos[0],
                                                         infos[1],
                                                         desc,
                                                         infos[2],
                                                         EmptyOptional(),
                                                         reasonIfUnsupported);
             }
             else
             {
                 return support.IsConvolution2dSupported(infos[0],
                                                         infos[1],
                                                         desc,
                                                         infos[2],
                                                         infos[3],
                                                         reasonIfUnsupported);
             }
         }
         case LayerType::Convolution3d:
         {
             if (infos.size() != 4)
             {
                 throw InvalidArgumentException("Invalid number of Convolution3d TensorInfos. "
                                                "TensorInfos should be of format: {input, output, weights, biases}.");
             }
  
             auto desc = *(PolymorphicDowncast<const Convolution3dDescriptor*>(&descriptor));
             if (infos[3] == TensorInfo())
             {
                 return support.IsConvolution3dSupported(infos[0],
                                                         infos[1],
                                                         desc,
                                                         infos[2],
                                                         EmptyOptional(),
                                                         reasonIfUnsupported);
             }
             else
             {
                 return support.IsConvolution3dSupported(infos[0],
                                                         infos[1],
                                                         desc,
                                                         infos[2],
                                                         infos[3],
                                                         reasonIfUnsupported);
             }
         }
         case LayerType::DepthToSpace:
             return support.IsDepthToSpaceSupported(infos[0],
                                                    infos[1],
                                                    *(PolymorphicDowncast<const DepthToSpaceDescriptor*>(&descriptor)),
                                                    reasonIfUnsupported);
         case LayerType::DepthwiseConvolution2d:
         {
             if (infos.size() != 4)
             {
                 throw InvalidArgumentException("Invalid number of DepthwiseConvolution2d TensorInfos. "
                                                "TensorInfos should be of format: {input, output, weights, biases}.");
             }
  
             auto desc = *(PolymorphicDowncast<const DepthwiseConvolution2dDescriptor*>(&descriptor));
             if (infos[3] == TensorInfo())
             {
                 return support.IsDepthwiseConvolutionSupported(infos[0],
                                                                infos[1],
                                                                desc,
                                                                infos[2],
                                                                EmptyOptional(),
                                                                reasonIfUnsupported);
             }
             else
             {
                 return support.IsDepthwiseConvolutionSupported(infos[0],
                                                                infos[1],
                                                                desc,
                                                                infos[2],
                                                                infos[3],
                                                                reasonIfUnsupported);
             }
         }
         case LayerType::Dequantize:
             return support.IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported);
         case LayerType::DetectionPostProcess:
         {
             auto desc = *(PolymorphicDowncast<const DetectionPostProcessDescriptor*>(&descriptor));
             return support.IsDetectionPostProcessSupported(infos[0],
                                                            infos[1],
                                                            infos[2],
                                                            infos[3],
                                                            infos[4],
                                                            infos[5],
                                                            infos[6],
                                                            desc,
                                                            reasonIfUnsupported);
         }
         case LayerType::Division:
             return support.IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
         case LayerType::ElementwiseBinary:
         {
             auto desc = *(PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&descriptor));
  
             switch (desc.m_Operation)
             {
                 case BinaryOperation::Add:
                     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate,
                                                    reasonIfUnsupported,
                                                    infos[0],
                                                    infos[1],
                                                    infos[2],
                                                    nullptr);
                 case BinaryOperation::Div:
                     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDivisionWorkloadValidate,
                                                    reasonIfUnsupported,
                                                    infos[0],
                                                    infos[1],
                                                    infos[2],
                                                    nullptr);
                 case BinaryOperation::Maximum:
                     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMaximumWorkloadValidate,
                                                    reasonIfUnsupported,
                                                    infos[0],
                                                    infos[1],
                                                    infos[2]);
                 case BinaryOperation::Minimum:
                     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMinimumWorkloadValidate,
                                                    reasonIfUnsupported,
                                                    infos[0],
                                                    infos[1],
                                                    infos[2]);
                 case BinaryOperation::Mul:
                     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate,
                                                    reasonIfUnsupported,
                                                    infos[0],
                                                    infos[1],
                                                    infos[2],
                                                    nullptr);
                 case BinaryOperation::Power:
                 case BinaryOperation::SqDiff:
                     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonElementwiseBinaryWorkloadValidate,
                                                    reasonIfUnsupported,
                                                    infos[0],
                                                    infos[1],
                                                    infos[2],
                                                    desc,
                                                    nullptr);
                 case BinaryOperation::Sub:
                     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate,
                                                    reasonIfUnsupported,
                                                    infos[0],
                                                    infos[1],
                                                    infos[2],
                                                    nullptr);
                 default:
                     return false;
             }
         }
         case LayerType::ElementwiseUnary:
             return support.IsElementwiseUnarySupported(infos[0],
                                                        infos[1],
                                                        *(PolymorphicDowncast<const
                                                            ElementwiseUnaryDescriptor*>(&descriptor)),
                                                        reasonIfUnsupported);
         case LayerType::Fill:
             return support.IsFillSupported(infos[0],
                                            infos[1],
                                            *(PolymorphicDowncast<const FillDescriptor*>(&descriptor)),
                                            reasonIfUnsupported);
         case LayerType::Floor:
             return support.IsFloorSupported(infos[0], infos[1], reasonIfUnsupported);
         case LayerType::FullyConnected:
             return support.IsFullyConnectedSupported(infos[0],
                                                      infos[1],
                                                      infos[2],
                                                      infos[3],
                                                      *(PolymorphicDowncast<const
                                                          FullyConnectedDescriptor*>(&descriptor)),
                                                      reasonIfUnsupported);
         case LayerType::Fused:
         {
             auto fusedDescriptor = *(PolymorphicDowncast<const FusedDescriptor*>(&descriptor));
             if (fusedDescriptor.m_NumInputSlots + fusedDescriptor.m_NumOutputSlots != infos.size())
             {
                 throw InvalidArgumentException("Invalid number of FusedLayer TensorInfos.");
             }
  
             auto it = infos.begin() + numeric_cast<TensorInfo::DifferenceType>(fusedDescriptor.m_NumInputSlots);
             std::vector<TensorInfo> inputInfos(infos.begin(), it);
             std::vector<TensorInfo> outputInfos(it, infos.end());
  
             return support.IsFusedSupported({inputInfos.begin(), inputInfos.end()},
                                             {outputInfos.begin(), outputInfos.end()},
                                             fusedDescriptor,
                                             reasonIfUnsupported);
         }
         case LayerType::Gather:
             return support.IsGatherSupported(infos[0],
                                              infos[1],
                                              infos[2],
                                              *(PolymorphicDowncast<const GatherDescriptor*>(&descriptor)),
                                              reasonIfUnsupported);
         case LayerType::GatherNd:
             return support.IsGatherNdSupported(infos[0],
                                                infos[1],
                                                infos[2],
                                                reasonIfUnsupported);
         case LayerType::Input:
             return support.IsInputSupported(infos[0], reasonIfUnsupported);
         case LayerType::InstanceNormalization:
             return support.IsInstanceNormalizationSupported(infos[0],
                                                             infos[1],
                                                             *(PolymorphicDowncast<const
                                                                 InstanceNormalizationDescriptor*>(&descriptor)),
                                                             reasonIfUnsupported);
         case LayerType::L2Normalization:
             return support.IsL2NormalizationSupported(infos[0],
                                                       infos[1],
                                                       *(PolymorphicDowncast<const
                                                           L2NormalizationDescriptor*>(&descriptor)),
                                                       reasonIfUnsupported);
         case LayerType::LogicalBinary:
             return support.IsLogicalBinarySupported(infos[0],
                                                     infos[1],
                                                     infos[2],
                                                     *(PolymorphicDowncast<const
                                                         LogicalBinaryDescriptor*>(&descriptor)),
                                                     reasonIfUnsupported);
         case LayerType::LogSoftmax:
             return support.IsLogSoftmaxSupported(infos[0],
                                                  infos[1],
                                                  *(PolymorphicDowncast<const LogSoftmaxDescriptor*>(&descriptor)),
                                                  reasonIfUnsupported);
         case LayerType::Lstm:
             return support.IsLstmSupported(infos[0],
                                            infos[1],
                                            infos[2],
                                            infos[3],
                                            infos[4],
                                            infos[5],
                                            infos[6],
                                            *(PolymorphicDowncast<const LstmDescriptor*>(&descriptor)),
                                            lstmParamsInfo.value(),
                                            reasonIfUnsupported);
         case LayerType::Map:
             return true;
         case LayerType::Maximum:
             return support.IsMaximumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
         case LayerType::Mean:
             return support.IsMeanSupported(infos[0],
                                            infos[1],
                                            *(PolymorphicDowncast<const MeanDescriptor*>(&descriptor)),
                                            reasonIfUnsupported);
         case LayerType::MemCopy:
             return support.IsMemCopySupported(infos[0], infos[1], reasonIfUnsupported);
         case LayerType::MemImport:
             return support.IsMemImportSupported(infos[0], infos[1], reasonIfUnsupported);
         case LayerType::Merge:
             return support.IsMergeSupported(infos[0],
                                                       infos[1],
                                                       infos[2],
                                                       reasonIfUnsupported);
         case LayerType::Minimum:
             return support.IsMinimumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
         case LayerType::Multiplication:
             return support.IsMultiplicationSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
         case LayerType::Normalization:
             return support.IsNormalizationSupported(infos[0],
                                                     infos[1],
                                                     *(PolymorphicDowncast<const
                                                         NormalizationDescriptor*>(&descriptor)),
                                                     reasonIfUnsupported);
         case LayerType::Output:
             return support.IsOutputSupported(infos[0], reasonIfUnsupported);
         case LayerType::Pad:
             return support.IsPadSupported(infos[0],
                                           infos[1],
                                           *(PolymorphicDowncast<const PadDescriptor*>(&descriptor)),
                                           reasonIfUnsupported);
         case LayerType::Permute:
             return support.IsPermuteSupported(infos[0],
                                               infos[1],
                                               *(PolymorphicDowncast<const PermuteDescriptor*>(&descriptor)),
                                               reasonIfUnsupported);
         case LayerType::Pooling2d:
             return support.IsPooling2dSupported(infos[0],
                                                 infos[1],
                                                 *(PolymorphicDowncast<const Pooling2dDescriptor*>(&descriptor)),
                                                 reasonIfUnsupported);
         case LayerType::Pooling3d:
             return support.IsPooling3dSupported(infos[0],
                                                 infos[1],
                                                 *(PolymorphicDowncast<const Pooling3dDescriptor*>(&descriptor)),
                                                 reasonIfUnsupported);
         case LayerType::Prelu:
             return support.IsPreluSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
         case LayerType::QLstm:
             return support.IsQLstmSupported(infos[0],
                                             infos[1],
                                             infos[2],
                                             infos[3],
                                             infos[4],
                                             infos[5],
                                             *(PolymorphicDowncast<const QLstmDescriptor*>(&descriptor)),
                                             lstmParamsInfo.value(),
                                             reasonIfUnsupported);
         case LayerType::Quantize:
             return support.IsQuantizeSupported(infos[0], infos[1], reasonIfUnsupported);
         case LayerType::QuantizedLstm:
             return support.IsQuantizedLstmSupported(infos[0],
                                                     infos[1],
                                                     infos[2],
                                                     infos[3],
                                                     infos[4],
                                                     quantizedLstmParamsInfo.value(),
                                                     reasonIfUnsupported);
         case LayerType::Rank:
             return true;
         case LayerType::Reshape:
             return support.IsReshapeSupported(infos[0],
                                               infos[1],
                                               *(PolymorphicDowncast<const ReshapeDescriptor*>(&descriptor)),
                                               reasonIfUnsupported);
         case LayerType::Resize:
             return support.IsResizeSupported(infos[0],
                                              infos[1],
                                              *(PolymorphicDowncast<const ResizeDescriptor*>(&descriptor)),
                                              reasonIfUnsupported);
         case LayerType::Reduce:
             return support.IsReduceSupported(infos[0],
                                              infos[1],
                                              *(PolymorphicDowncast<const ReduceDescriptor*>(&descriptor)),
                                              reasonIfUnsupported);
         case LayerType::ReverseV2:
             return support.IsReverseV2Supported(infos[0],
                                                 infos[1],
                                                 infos[2],
                                                 reasonIfUnsupported);
         case LayerType::Shape:
             return support.IsShapeSupported(infos[0],
                                             infos[1],
                                             reasonIfUnsupported);
         case LayerType::Slice:
             return support.IsSliceSupported(infos[0],
                                             infos[1],
                                             *(PolymorphicDowncast<const SliceDescriptor*>(&descriptor)),
                                             reasonIfUnsupported);
         case LayerType::Softmax:
             return support.IsSoftmaxSupported(infos[0],
                                               infos[1],
                                               *(PolymorphicDowncast<const SoftmaxDescriptor*>(&descriptor)),
                                               reasonIfUnsupported);
         case LayerType::SpaceToBatchNd:
             return support.IsSpaceToBatchNdSupported(infos[0],
                                                      infos[1],
                                                      *(PolymorphicDowncast<const
                                                         SpaceToBatchNdDescriptor*>(&descriptor)),
                                                      reasonIfUnsupported);
         case LayerType::SpaceToDepth:
             return support.IsSpaceToDepthSupported(infos[0],
                                                    infos[1],
                                                    *(PolymorphicDowncast<const SpaceToDepthDescriptor*>(&descriptor)),
                                                    reasonIfUnsupported);
         case LayerType::Splitter:
         {
             std::vector<TensorInfo> outputInfos;
             for (uint32_t i = 1; i < infos.size(); i++)
             {
                 outputInfos.push_back(infos[i]);
             }
             return support.IsSplitterSupported(infos[0],
                                                {outputInfos.begin(), outputInfos.end()},
                                                *(PolymorphicDowncast<const ViewsDescriptor*>(&descriptor)),
                                                reasonIfUnsupported);
         }
         case LayerType::Stack:
         {
             std::vector<const TensorInfo*> inputInfos;
             for (uint32_t i = 0; i < infos.size() - 1; i++)
             {
                 inputInfos.push_back(&infos[i]);
             }
             return support.IsStackSupported(inputInfos,
                                             infos[infos.size() - 1],
                                             *(PolymorphicDowncast<const StackDescriptor*>(&descriptor)),
                                             reasonIfUnsupported);
         }
         case LayerType::StridedSlice:
             return support.IsStridedSliceSupported(infos[0],
                                                    infos[1],
                                                    *(PolymorphicDowncast<const StridedSliceDescriptor*>(&descriptor)),
                                                    reasonIfUnsupported);
         case LayerType::Subtraction:
             return support.IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
         case LayerType::Tile:
             return support.IsTileSupported(infos[0],
                                            infos[1],
                                            *(PolymorphicDowncast<const TileDescriptor*>(&descriptor)),
                                            reasonIfUnsupported);
         case LayerType::Transpose:
             return support.IsTransposeSupported(infos[0],
                                                 infos[1],
                                                 *(PolymorphicDowncast<const TransposeDescriptor*>(&descriptor)),
                                                 reasonIfUnsupported);
         case LayerType::TransposeConvolution2d:
         {
             if (infos.size() != 4)
             {
                 throw InvalidArgumentException("Invalid number of TransposeConvolution2d TensorInfos. "
                                                "TensorInfos should be of format: {input, output, weights, biases}.");
             }
  
             auto desc = *(PolymorphicDowncast<const TransposeConvolution2dDescriptor*>(&descriptor));
             if (infos[3] == TensorInfo())
             {
                 return support.IsTransposeConvolution2dSupported(infos[0],
                                                                  infos[1],
                                                                  desc,
                                                                  infos[2],
                                                                  EmptyOptional(),
                                                                  reasonIfUnsupported);
             }
             else
             {
                 return support.IsTransposeConvolution2dSupported(infos[0],
                                                                  infos[1],
                                                                  desc,
                                                                  infos[2],
                                                                  infos[3],
                                                                  reasonIfUnsupported);
             }
         }
         case LayerType::UnidirectionalSequenceLstm:
         {
             auto desc = *(PolymorphicDowncast<const UnidirectionalSequenceLstmDescriptor*>(&descriptor));
             return support.IsUnidirectionalSequenceLstmSupported(infos[0],
                                                                  infos[1],
                                                                  infos[2],
                                                                  infos[3],
                                                                  infos[4],
                                                                  infos[5],
                                                                  desc,
                                                                  lstmParamsInfo.value(),
                                                                  reasonIfUnsupported);
         }
         case LayerType::Unmap:
             return true;
         default:
             // layers not supported in neon by default:
             // debug, fakequantization, precompiled,
             // standin, switch
             return false;
     }
 }

Referenced by NeonLayerSupport::IsLayerSupported().

◆ IsNCHW()

bool armnn::IsNCHW ( armnn::Layer & layer )

inline

Definition at line 213 of file SubgraphUtils.hpp.

 {
     CheckForNCHW check;
     layer.ExecuteStrategy(check);
     return check.Result();
 }

References Layer::ExecuteStrategy().

Referenced by ConnectedToLayerWithNCHW().

◆ IsOperationQueueDescriptor() [1/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const ConstantQueueDescriptor & )

constexpr

Definition at line 22 of file RefWorkloadFactory.hpp.

22 { return false; }

◆ IsOperationQueueDescriptor() [2/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const MemCopyQueueDescriptor & )

constexpr

Definition at line 20 of file RefWorkloadFactory.hpp.

20 { return false; }

◆ IsOperationQueueDescriptor() [3/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const PermuteQueueDescriptor & )

constexpr

Definition at line 24 of file RefWorkloadFactory.hpp.

24 { return false; }

◆ IsOperationQueueDescriptor() [4/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const QueueDescriptorType & )

constexpr

Definition at line 18 of file RefWorkloadFactory.hpp.

18 { return true; }

◆ IsQAsymmS8()

bool armnn::IsQAsymmS8 ( const WorkloadInfo & info )

Definition at line 72 of file RefWorkloadFactory.cpp.

 {
     return IsDataType<DataType::QAsymmS8>(info);
 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQAsymmU8()

bool armnn::IsQAsymmU8 ( const WorkloadInfo & info )

Definition at line 76 of file RefWorkloadFactory.cpp.

 {
     return IsDataType<DataType::QAsymmU8>(info);
 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQSymmS16()

bool armnn::IsQSymmS16 ( const WorkloadInfo & info )

Definition at line 64 of file RefWorkloadFactory.cpp.

 {
     return IsDataType<DataType::QSymmS16>(info);
 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQSymmS8()

bool armnn::IsQSymmS8 ( const WorkloadInfo & info )

Definition at line 68 of file RefWorkloadFactory.cpp.

 {
     return IsDataType<DataType::QSymmS8>(info);
 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQuantized8BitType()

constexpr bool armnn::IsQuantized8BitType ( DataType dataType )

constexpr

Definition at line 316 of file TypesUtils.hpp.

 {
     return dataType == DataType::QAsymmU8        ||
            dataType == DataType::QAsymmS8        ||
            dataType == DataType::QSymmS8;
 }

References QAsymmS8, QAsymmU8, and QSymmS8.

Referenced by RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), IsQuantizedType(), and RefLayerSupport::IsTransposeConvolution2dSupported().

◆ IsQuantizedType() [1/2]

constexpr bool armnn::IsQuantizedType ( )

constexpr

Definition at line 311 of file TypesUtils.hpp.

 {
     return std::is_integral<T>::value;
 }

Referenced by ClMultiplicationWorkload::ClMultiplicationWorkload(), RefWorkloadFactory::CreateWorkload(), TensorInfo::IsQuantized(), NeonMultiplicationWorkload::NeonMultiplicationWorkload(), and QuantizeQueueDescriptor::Validate().

◆ IsQuantizedType() [2/2]

constexpr bool armnn::IsQuantizedType ( DataType dataType )

constexpr

Definition at line 323 of file TypesUtils.hpp.

 {
     return dataType == DataType::QSymmS16 || IsQuantized8BitType(dataType);
 }

References IsQuantized8BitType(), and QSymmS16.

◆ IsReadyForSplitAssignment()

bool armnn::IsReadyForSplitAssignment	(	LayerSelectionInfo::LayerInfoContainer &	layerInfos,
		LayerSelectionInfo &	layerInfo
	)

Definition at line 374 of file SubgraphViewSelector.cpp.

 {
     bool ready = true;
     ForEachLayerInput(layerInfos, layerInfo,
                       [&ready](LayerSelectionInfo& parentInfo)
                           {
                               if (!parentInfo.m_IsProcessed)
                               {
                                   ready = false;
                               }
                           });
     return ready;
 }

References ForEachLayerInput().

Referenced by SubgraphViewSelector::SelectSubgraphs().

◆ IsSequenceLayerType() [1/2]

bool armnn::IsSequenceLayerType	(	Layer &	layer,
		BinaryOperation	type
	)

inline

Definition at line 367 of file SubgraphUtils.hpp.

 {
     return (layer.GetType() == LayerType::ElementwiseBinary) &&
             (PolymorphicDowncast<ElementwiseBinaryLayer*>(&layer)->GetParameters().m_Operation == type);
 }

References ElementwiseBinary, and Layer::GetType().

◆ IsSequenceLayerType() [2/2]

bool armnn::IsSequenceLayerType	(	Layer &	layer,
		LayerType	type
	)

inline

Definition at line 362 of file SubgraphUtils.hpp.

 {
     return layer.GetType() == type;
 }

References Layer::GetType().

Referenced by BuildAddMulAddTensorInfoLists().

◆ IsSigned32()

bool armnn::IsSigned32 ( const WorkloadInfo & info )

Definition at line 52 of file RefWorkloadFactory.cpp.

 {
     return IsDataType<DataType::Signed32>(info);
 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsSigned64()

bool armnn::IsSigned64 ( const WorkloadInfo & info )

Definition at line 48 of file RefWorkloadFactory.cpp.

 {
     return IsDataType<DataType::Signed64>(info);
 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsSupportedForDataTypeGeneric()

bool armnn::IsSupportedForDataTypeGeneric	(	Optional< std::string & >	reasonIfUnsupported,
		DataType	dataType,
		Float16Func	float16FuncPtr,
		Float32Func	float32FuncPtr,
		Uint8Func	uint8FuncPtr,
		Int32Func	int32FuncPtr,
		BooleanFunc	booleanFuncPtr,
		Params &&...	params
	)

Definition at line 27 of file LayerSupportCommon.hpp.

 {
     switch(dataType)
     {
         case DataType::Float16:
             return float16FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
         case DataType::Float32:
             return float32FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
         case DataType::QAsymmU8:
             return uint8FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
         case DataType::Signed32:
             return int32FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
         case DataType::Boolean:
             return booleanFuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
         default:
             return false;
     }
 }

References Boolean, Float16, Float32, QAsymmU8, and Signed32.

Referenced by RefLayerSupport::IsConvertFp16ToFp32Supported(), RefLayerSupport::IsConvertFp32ToFp16Supported(), and NeonLayerSupport::IsFloorSupported().

◆ LayerEnumOf() [1/78]

constexpr LayerType armnn::LayerEnumOf ( const ActivationLayer * )

constexpr

Definition at line 114 of file LayersFwd.hpp.

◆ LayerEnumOf() [2/78]

constexpr LayerType armnn::LayerEnumOf ( const AdditionLayer * )

constexpr

Definition at line 115 of file LayersFwd.hpp.

◆ LayerEnumOf() [3/78]

constexpr LayerType armnn::LayerEnumOf ( const ArgMinMaxLayer * )

constexpr

Definition at line 116 of file LayersFwd.hpp.

◆ LayerEnumOf() [4/78]

constexpr LayerType armnn::LayerEnumOf ( const BatchMatMulLayer * )

constexpr

Definition at line 117 of file LayersFwd.hpp.

◆ LayerEnumOf() [5/78]

constexpr LayerType armnn::LayerEnumOf ( const BatchNormalizationLayer * )

constexpr

Definition at line 118 of file LayersFwd.hpp.

◆ LayerEnumOf() [6/78]

constexpr LayerType armnn::LayerEnumOf ( const BatchToSpaceNdLayer * )

constexpr

Definition at line 119 of file LayersFwd.hpp.

◆ LayerEnumOf() [7/78]

constexpr LayerType armnn::LayerEnumOf ( const BroadcastToLayer * )

constexpr

Definition at line 120 of file LayersFwd.hpp.

◆ LayerEnumOf() [8/78]

constexpr LayerType armnn::LayerEnumOf ( const CastLayer * )

constexpr

Definition at line 121 of file LayersFwd.hpp.

◆ LayerEnumOf() [9/78]

constexpr LayerType armnn::LayerEnumOf ( const ChannelShuffleLayer * )

constexpr

Definition at line 122 of file LayersFwd.hpp.

◆ LayerEnumOf() [10/78]

constexpr LayerType armnn::LayerEnumOf ( const ComparisonLayer * )

constexpr

Definition at line 123 of file LayersFwd.hpp.

◆ LayerEnumOf() [11/78]

constexpr LayerType armnn::LayerEnumOf ( const ConcatLayer * )

constexpr

Definition at line 124 of file LayersFwd.hpp.

◆ LayerEnumOf() [12/78]

constexpr LayerType armnn::LayerEnumOf ( const ConstantLayer * )

constexpr

Definition at line 125 of file LayersFwd.hpp.

◆ LayerEnumOf() [13/78]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp16ToFp32Layer * )

constexpr

Definition at line 126 of file LayersFwd.hpp.

◆ LayerEnumOf() [14/78]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp32ToFp16Layer * )

constexpr

Definition at line 127 of file LayersFwd.hpp.

◆ LayerEnumOf() [15/78]

constexpr LayerType armnn::LayerEnumOf ( const Convolution2dLayer * )

constexpr

Definition at line 128 of file LayersFwd.hpp.

◆ LayerEnumOf() [16/78]

constexpr LayerType armnn::LayerEnumOf ( const Convolution3dLayer * )

constexpr

Definition at line 129 of file LayersFwd.hpp.

◆ LayerEnumOf() [17/78]

constexpr LayerType armnn::LayerEnumOf ( const DebugLayer * )

constexpr

Definition at line 130 of file LayersFwd.hpp.

◆ LayerEnumOf() [18/78]

constexpr LayerType armnn::LayerEnumOf ( const DepthToSpaceLayer * )

constexpr

Definition at line 131 of file LayersFwd.hpp.

◆ LayerEnumOf() [19/78]

constexpr LayerType armnn::LayerEnumOf ( const DepthwiseConvolution2dLayer * )

constexpr

Definition at line 132 of file LayersFwd.hpp.

◆ LayerEnumOf() [20/78]

constexpr LayerType armnn::LayerEnumOf ( const DequantizeLayer * )

constexpr

Definition at line 133 of file LayersFwd.hpp.

◆ LayerEnumOf() [21/78]

constexpr LayerType armnn::LayerEnumOf ( const DetectionPostProcessLayer * )

constexpr

Definition at line 134 of file LayersFwd.hpp.

◆ LayerEnumOf() [22/78]

constexpr LayerType armnn::LayerEnumOf ( const DivisionLayer * )

constexpr

Definition at line 135 of file LayersFwd.hpp.

◆ LayerEnumOf() [23/78]

constexpr LayerType armnn::LayerEnumOf ( const ElementwiseBinaryLayer * )

constexpr

Definition at line 136 of file LayersFwd.hpp.

◆ LayerEnumOf() [24/78]

constexpr LayerType armnn::LayerEnumOf ( const ElementwiseUnaryLayer * )

constexpr

Definition at line 137 of file LayersFwd.hpp.

◆ LayerEnumOf() [25/78]

constexpr LayerType armnn::LayerEnumOf ( const FakeQuantizationLayer * )

constexpr

Definition at line 138 of file LayersFwd.hpp.

◆ LayerEnumOf() [26/78]

constexpr LayerType armnn::LayerEnumOf ( const FillLayer * )

constexpr

Definition at line 139 of file LayersFwd.hpp.

◆ LayerEnumOf() [27/78]

constexpr LayerType armnn::LayerEnumOf ( const FloorLayer * )

constexpr

Definition at line 140 of file LayersFwd.hpp.

◆ LayerEnumOf() [28/78]

constexpr LayerType armnn::LayerEnumOf ( const FullyConnectedLayer * )

constexpr

Definition at line 141 of file LayersFwd.hpp.

◆ LayerEnumOf() [29/78]

constexpr LayerType armnn::LayerEnumOf ( const FusedLayer * )

constexpr

Definition at line 142 of file LayersFwd.hpp.

◆ LayerEnumOf() [30/78]

constexpr LayerType armnn::LayerEnumOf ( const GatherLayer * )

constexpr

Definition at line 143 of file LayersFwd.hpp.

◆ LayerEnumOf() [31/78]

constexpr LayerType armnn::LayerEnumOf ( const GatherNdLayer * )

constexpr

Definition at line 144 of file LayersFwd.hpp.

◆ LayerEnumOf() [32/78]

constexpr LayerType armnn::LayerEnumOf ( const InputLayer * )

constexpr

Definition at line 145 of file LayersFwd.hpp.

◆ LayerEnumOf() [33/78]

constexpr LayerType armnn::LayerEnumOf ( const InstanceNormalizationLayer * )

constexpr

Definition at line 146 of file LayersFwd.hpp.

◆ LayerEnumOf() [34/78]

constexpr LayerType armnn::LayerEnumOf ( const L2NormalizationLayer * )

constexpr

Definition at line 147 of file LayersFwd.hpp.

◆ LayerEnumOf() [35/78]

constexpr LayerType armnn::LayerEnumOf ( const LogicalBinaryLayer * )

constexpr

Definition at line 148 of file LayersFwd.hpp.

◆ LayerEnumOf() [36/78]

constexpr LayerType armnn::LayerEnumOf ( const LogSoftmaxLayer * )

constexpr

Definition at line 149 of file LayersFwd.hpp.

◆ LayerEnumOf() [37/78]

constexpr LayerType armnn::LayerEnumOf ( const LstmLayer * )

constexpr

Definition at line 150 of file LayersFwd.hpp.

◆ LayerEnumOf() [38/78]

constexpr LayerType armnn::LayerEnumOf ( const MapLayer * )

constexpr

Definition at line 151 of file LayersFwd.hpp.

◆ LayerEnumOf() [39/78]

constexpr LayerType armnn::LayerEnumOf ( const MaximumLayer * )

constexpr

Definition at line 152 of file LayersFwd.hpp.

◆ LayerEnumOf() [40/78]

constexpr LayerType armnn::LayerEnumOf ( const MeanLayer * )

constexpr

Definition at line 153 of file LayersFwd.hpp.

◆ LayerEnumOf() [41/78]

constexpr LayerType armnn::LayerEnumOf ( const MemCopyLayer * )

constexpr

Definition at line 154 of file LayersFwd.hpp.

◆ LayerEnumOf() [42/78]

constexpr LayerType armnn::LayerEnumOf ( const MemImportLayer * )

constexpr

Definition at line 155 of file LayersFwd.hpp.

◆ LayerEnumOf() [43/78]

constexpr LayerType armnn::LayerEnumOf ( const MergeLayer * )

constexpr

Definition at line 156 of file LayersFwd.hpp.

◆ LayerEnumOf() [44/78]

constexpr LayerType armnn::LayerEnumOf ( const MinimumLayer * )

constexpr

Definition at line 157 of file LayersFwd.hpp.

◆ LayerEnumOf() [45/78]

constexpr LayerType armnn::LayerEnumOf ( const MultiplicationLayer * )

constexpr

Definition at line 158 of file LayersFwd.hpp.

◆ LayerEnumOf() [46/78]

constexpr LayerType armnn::LayerEnumOf ( const NormalizationLayer * )

constexpr

Definition at line 159 of file LayersFwd.hpp.

◆ LayerEnumOf() [47/78]

constexpr LayerType armnn::LayerEnumOf ( const OutputLayer * )

constexpr

Definition at line 160 of file LayersFwd.hpp.

◆ LayerEnumOf() [48/78]

constexpr LayerType armnn::LayerEnumOf ( const PadLayer * )

constexpr

Definition at line 161 of file LayersFwd.hpp.

◆ LayerEnumOf() [49/78]

constexpr LayerType armnn::LayerEnumOf ( const PermuteLayer * )

constexpr

Definition at line 162 of file LayersFwd.hpp.

◆ LayerEnumOf() [50/78]

constexpr LayerType armnn::LayerEnumOf ( const Pooling2dLayer * )

constexpr

Definition at line 163 of file LayersFwd.hpp.

◆ LayerEnumOf() [51/78]

constexpr LayerType armnn::LayerEnumOf ( const Pooling3dLayer * )

constexpr

Definition at line 164 of file LayersFwd.hpp.

◆ LayerEnumOf() [52/78]

constexpr LayerType armnn::LayerEnumOf ( const PreCompiledLayer * )

constexpr

Definition at line 165 of file LayersFwd.hpp.

◆ LayerEnumOf() [53/78]

constexpr LayerType armnn::LayerEnumOf ( const PreluLayer * )

constexpr

Definition at line 166 of file LayersFwd.hpp.

◆ LayerEnumOf() [54/78]

constexpr LayerType armnn::LayerEnumOf ( const QLstmLayer * )

constexpr

Definition at line 168 of file LayersFwd.hpp.

◆ LayerEnumOf() [55/78]

constexpr LayerType armnn::LayerEnumOf ( const QuantizedLstmLayer * )

constexpr

Definition at line 169 of file LayersFwd.hpp.

◆ LayerEnumOf() [56/78]

constexpr LayerType armnn::LayerEnumOf ( const QuantizeLayer * )

constexpr

Definition at line 167 of file LayersFwd.hpp.

◆ LayerEnumOf() [57/78]

constexpr LayerType armnn::LayerEnumOf ( const RankLayer * )

constexpr

Definition at line 170 of file LayersFwd.hpp.

◆ LayerEnumOf() [58/78]

constexpr LayerType armnn::LayerEnumOf ( const ReduceLayer * )

constexpr

Definition at line 171 of file LayersFwd.hpp.

◆ LayerEnumOf() [59/78]

constexpr LayerType armnn::LayerEnumOf ( const ReshapeLayer * )

constexpr

Definition at line 172 of file LayersFwd.hpp.

◆ LayerEnumOf() [60/78]

constexpr LayerType armnn::LayerEnumOf ( const ResizeLayer * )

constexpr

Definition at line 173 of file LayersFwd.hpp.

◆ LayerEnumOf() [61/78]

constexpr LayerType armnn::LayerEnumOf ( const ReverseV2Layer * )

constexpr

Definition at line 174 of file LayersFwd.hpp.

◆ LayerEnumOf() [62/78]

constexpr LayerType armnn::LayerEnumOf ( const ShapeLayer * )

constexpr

Definition at line 175 of file LayersFwd.hpp.

◆ LayerEnumOf() [63/78]

constexpr LayerType armnn::LayerEnumOf ( const SliceLayer * )

constexpr

Definition at line 176 of file LayersFwd.hpp.

◆ LayerEnumOf() [64/78]

constexpr LayerType armnn::LayerEnumOf ( const SoftmaxLayer * )

constexpr

Definition at line 177 of file LayersFwd.hpp.

◆ LayerEnumOf() [65/78]

constexpr LayerType armnn::LayerEnumOf ( const SpaceToBatchNdLayer * )

constexpr

Definition at line 178 of file LayersFwd.hpp.

◆ LayerEnumOf() [66/78]

constexpr LayerType armnn::LayerEnumOf ( const SpaceToDepthLayer * )

constexpr

Definition at line 179 of file LayersFwd.hpp.

◆ LayerEnumOf() [67/78]

constexpr LayerType armnn::LayerEnumOf ( const SplitterLayer * )

constexpr

Definition at line 180 of file LayersFwd.hpp.

◆ LayerEnumOf() [68/78]

constexpr LayerType armnn::LayerEnumOf ( const StackLayer * )

constexpr

Definition at line 181 of file LayersFwd.hpp.

◆ LayerEnumOf() [69/78]

constexpr LayerType armnn::LayerEnumOf ( const StandInLayer * )

constexpr

Definition at line 182 of file LayersFwd.hpp.

◆ LayerEnumOf() [70/78]

constexpr LayerType armnn::LayerEnumOf ( const StridedSliceLayer * )

constexpr

Definition at line 183 of file LayersFwd.hpp.

◆ LayerEnumOf() [71/78]

constexpr LayerType armnn::LayerEnumOf ( const SubtractionLayer * )

constexpr

Definition at line 184 of file LayersFwd.hpp.

◆ LayerEnumOf() [72/78]

constexpr LayerType armnn::LayerEnumOf ( const SwitchLayer * )

constexpr

Definition at line 185 of file LayersFwd.hpp.

◆ LayerEnumOf() [73/78]

constexpr LayerType armnn::LayerEnumOf ( const T * = nullptr )

constexpr

◆ LayerEnumOf() [74/78]

constexpr LayerType armnn::LayerEnumOf ( const TileLayer * )

constexpr

Definition at line 186 of file LayersFwd.hpp.

◆ LayerEnumOf() [75/78]

constexpr LayerType armnn::LayerEnumOf ( const TransposeConvolution2dLayer * )

constexpr

Definition at line 188 of file LayersFwd.hpp.

◆ LayerEnumOf() [76/78]

constexpr LayerType armnn::LayerEnumOf ( const TransposeLayer * )

constexpr

Definition at line 187 of file LayersFwd.hpp.

◆ LayerEnumOf() [77/78]

constexpr LayerType armnn::LayerEnumOf ( const UnidirectionalSequenceLstmLayer * )

constexpr

Definition at line 189 of file LayersFwd.hpp.

◆ LayerEnumOf() [78/78]

constexpr LayerType armnn::LayerEnumOf ( const UnmapLayer * )

constexpr

Definition at line 190 of file LayersFwd.hpp.

◆ LevelToString()

std::string armnn::LevelToString ( LogSeverity level )

inline

Definition at line 22 of file Logging.hpp.

 {
     switch(level)
     {
         case LogSeverity::Trace:
             return "Trace";
         case LogSeverity::Debug:
             return "Debug";
         case LogSeverity::Info:
             return "Info";
         case LogSeverity::Warning:
             return "Warning";
         case LogSeverity::Error:
             return "Error";
         case LogSeverity::Fatal:
             return "Fatal";
         default:
             return "Log";
     }
 }

References Debug, Error, Fatal, Info, Trace, and Warning.

Referenced by ScopedRecord::ScopedRecord().

◆ LogSoftmax()

void LogSoftmax	(	Decoder< float > &	input,
		Encoder< float > &	output,
		const TensorInfo &	inputInfo,
		const LogSoftmaxDescriptor &	descriptor
	)

Definition at line 29 of file LogSoftmax.cpp.

 {
     const unsigned int numDimensions = inputInfo.GetNumDimensions();
  
     bool axisIsValid = ValidateAxis(descriptor.m_Axis, numDimensions);
     ARMNN_ASSERT_MSG(axisIsValid,
         "Axis index is not in range [-numDimensions, numDimensions).");
     IgnoreUnused(axisIsValid);
  
     unsigned int uAxis = descriptor.m_Axis < 0  ?
         numDimensions - armnn::numeric_cast<unsigned int>(std::abs(descriptor.m_Axis)) :
         armnn::numeric_cast<unsigned int>(descriptor.m_Axis);
  
     const TensorShape& inputShape = inputInfo.GetShape();
     const unsigned int outerSize  = armnnUtils::GetNumElementsBetween(inputShape, 0, uAxis);
     const unsigned int axisSize   = inputShape[uAxis];
     const unsigned int innerSize  = armnnUtils::GetNumElementsBetween(inputShape,
                                                                       uAxis + 1,
                                                                       inputShape.GetNumDimensions());
  
     for (unsigned int outer = 0; outer < outerSize; ++outer)
     {
         for (unsigned int inner = 0; inner < innerSize; ++inner)
         {
             // Find max
             input[outer * axisSize * innerSize + inner];
             float maxValue = input.Get();
             for (unsigned int i = 1u; i < axisSize; ++i)
             {
                 input[(outer * axisSize + i) * innerSize + inner];
                 maxValue = std::max(maxValue, input.Get());
             }
  
             // Compute sum
             float sum = 0.0f;
             for (unsigned int i = 0u; i < axisSize; ++i)
             {
                 input[(outer * axisSize + i) * innerSize + inner];
                 sum += std::exp((input.Get() - maxValue) * descriptor.m_Beta);
             }
  
             // Compute log sum
             const float logSum = std::log(sum);
  
             // Compute result
             for (unsigned int i = 0u; i < axisSize; ++i)
             {
                 const unsigned int index = (outer * axisSize + i) * innerSize + inner;
  
                 input [index];
                 output[index];
  
                 output.Set((input.Get() - maxValue) * descriptor.m_Beta - logSum);
             }
         }
     }
 }

References ARMNN_ASSERT_MSG, Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), IgnoreUnused(), SoftmaxDescriptor::m_Axis, SoftmaxDescriptor::m_Beta, and Encoder< IType >::Set().

◆ LstmImpl()

void LstmImpl	(	const LstmDescriptor &	descriptor,
		const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const TensorShape &	inputToOutputWeightsShape,
		const TensorShape &	recurrentToOutputWeightsShape,
		std::unique_ptr< Decoder< float >> &	inputData,
		std::unique_ptr< Decoder< float >> &	outputStateIn,
		std::unique_ptr< Decoder< float >> &	cellStateIn,
		std::unique_ptr< Encoder< float >> &	outputStateOut,
		std::unique_ptr< Encoder< float >> &	cellStateOut,
		std::unique_ptr< Encoder< float >> &	output,
		std::unique_ptr< Decoder< float >> &	cellStateOutDecoder,
		std::unique_ptr< Decoder< float >> &	outputDecoder,
		std::unique_ptr< Decoder< float >> &	inputToInputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	inputToForgetWeightsTensor,
		std::unique_ptr< Decoder< float >> &	inputToCellWeightsTensor,
		std::unique_ptr< Decoder< float >> &	inputToOutputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	recurrentToInputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	recurrentToForgetWeightsTensor,
		std::unique_ptr< Decoder< float >> &	recurrentToCellWeightsTensor,
		std::unique_ptr< Decoder< float >> &	recurrentToOutputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	cellToInputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	cellToForgetWeightsTensor,
		std::unique_ptr< Decoder< float >> &	cellToOutputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	inputGateBiasTensor,
		std::unique_ptr< Decoder< float >> &	forgetGateBiasTensor,
		std::unique_ptr< Decoder< float >> &	cellBiasTensor,
		std::unique_ptr< Decoder< float >> &	outputGateBiasTensor,
		std::unique_ptr< Decoder< float >> &	projectionWeightsTensor,
		std::unique_ptr< Decoder< float >> &	projectionBiasTensor,
		std::unique_ptr< Decoder< float >> &	inputLayerNormWeights,
		std::unique_ptr< Decoder< float >> &	forgetLayerNormWeights,
		std::unique_ptr< Decoder< float >> &	cellLayerNormWeights,
		std::unique_ptr< Decoder< float >> &	outputLayerNormWeights,
		std::unique_ptr< Encoder< float >> &	inputGateScratch,
		std::unique_ptr< Encoder< float >> &	cellScratch,
		std::unique_ptr< Encoder< float >> &	forgetGateScratch,
		std::unique_ptr< Encoder< float >> &	outputGateScratch,
		std::unique_ptr< Decoder< float >> &	inputGateScratchDecoder,
		std::unique_ptr< Decoder< float >> &	cellScratchDecoder,
		std::unique_ptr< Decoder< float >> &	forgetGateScratchDecoder,
		std::unique_ptr< Decoder< float >> &	outputGateScratchDecoder,
		float	layerNormEpsilon
	)

Definition at line 13 of file Lstm.cpp.

 {
     // This is a porting of the LSTM::Eval() method in the Android code base
     // Refer to: android/frameworks/ml/nn/common/operations/LSTM.cpp
  
     const TensorShape& inputShape = inputInfo.GetShape();
     const DataType& outputType = outputInfo.GetDataType();
  
     const uint32_t nBatch = inputShape[0];
     const uint32_t nInput = inputShape[1];
  
     const uint32_t nCell   = inputToOutputWeightsShape[0];
     const uint32_t nOutput = recurrentToOutputWeightsShape[1];
  
     const bool useCifg      = descriptor.m_CifgEnabled;
     const bool usePeephole  = descriptor.m_PeepholeEnabled;
     const bool useLayerNorm = descriptor.m_LayerNormEnabled;
  
     if (!useLayerNorm)
     {
         // Initialize scratch buffers with bias.
         if (!useCifg)
         {
             VectorBatchVectorAssign(*inputGateBiasTensor,
                                     nCell, nBatch, *inputGateScratch);
         }
         VectorBatchVectorAssign(*forgetGateBiasTensor,
                                 nCell, nBatch, *forgetGateScratch);
         VectorBatchVectorAssign(*cellBiasTensor,
                                 nCell, nBatch, *cellScratch);
         VectorBatchVectorAssign(*outputGateBiasTensor,
                                 nCell, nBatch, *outputGateScratch);
     }
     else
     {
         // Initialize scratch buffers with zeroes.
         if (!useCifg)
         {
             ZeroVector(*inputGateScratch, nCell * nBatch);
         }
         ZeroVector(*forgetGateScratch, nCell * nBatch);
         ZeroVector(*cellScratch      , nCell * nBatch);
         ZeroVector(*outputGateScratch, nCell * nBatch);
     }
  
     // For each batch and cell: compute input_weight * input.
     if (!useCifg)
     {
         MatrixBatchVectorMultiplyAccumulate(*inputToInputWeightsTensor,
                                             nCell, nInput, *inputData, nBatch, *inputGateScratch);
     }
     MatrixBatchVectorMultiplyAccumulate(*inputToForgetWeightsTensor,
                                         nCell, nInput, *inputData, nBatch, *forgetGateScratch);
     MatrixBatchVectorMultiplyAccumulate(*inputToCellWeightsTensor,
                                         nCell, nInput, *inputData, nBatch, *cellScratch);
     MatrixBatchVectorMultiplyAccumulate(*inputToOutputWeightsTensor,
                                         nCell, nInput, *inputData, nBatch, *outputGateScratch);
  
     // For each batch and cell: compute recurrent_weight * output_state.
     if (!useCifg)
     {
         MatrixBatchVectorMultiplyAccumulate(*recurrentToInputWeightsTensor,
                                             nCell, nOutput, *outputStateIn, nBatch, *inputGateScratch);
     }
     MatrixBatchVectorMultiplyAccumulate(*recurrentToForgetWeightsTensor,
                                         nCell, nOutput, *outputStateIn, nBatch, *forgetGateScratch);
     MatrixBatchVectorMultiplyAccumulate(*recurrentToCellWeightsTensor,
                                         nCell, nOutput, *outputStateIn, nBatch, *cellScratch);
     MatrixBatchVectorMultiplyAccumulate(*recurrentToOutputWeightsTensor,
                                         nCell, nOutput, *outputStateIn, nBatch, *outputGateScratch);
  
     // For each batch and cell: update input gate.
     if (!useCifg)
     {
         if (usePeephole)
         {
             VectorBatchVectorCwiseProductAccumulate(*cellToInputWeightsTensor,
                                                     nCell, *cellStateIn, nBatch, *inputGateScratch);
         }
         if (useLayerNorm)
         {
             MeanStddevNormalization(*inputGateScratchDecoder,
                                     *inputGateScratch, nCell, nBatch, layerNormEpsilon);
             VectorBatchVectorCwiseProduct(*inputLayerNormWeights,
                                           nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
             VectorBatchVectorAdd(*inputGateBiasTensor,
                                  nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
         }
         Activation(*inputGateScratchDecoder, *inputGateScratch,
                    TensorInfo({nCell, nBatch}, outputType),
                    ActivationFunction::Sigmoid, 0, 0);
     }
  
     // For each batch and cell: update forget gate.
     if (usePeephole)
     {
         VectorBatchVectorCwiseProductAccumulate(*cellToForgetWeightsTensor, nCell,
                                                 *cellStateIn, nBatch, *forgetGateScratch);
     }
     if (useLayerNorm)
     {
         MeanStddevNormalization(*forgetGateScratchDecoder,
                                 *forgetGateScratch, nCell, nBatch, layerNormEpsilon);
         VectorBatchVectorCwiseProduct(*forgetLayerNormWeights,
                                       nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
         VectorBatchVectorAdd(*forgetGateBiasTensor,
                              nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
     }
     Activation(*forgetGateScratchDecoder, *forgetGateScratch,
                TensorInfo({nCell, nBatch}, outputType),
                ActivationFunction::Sigmoid, 0, 0);
  
     // For each batch and cell: update the cell.
     if (useLayerNorm)
     {
         MeanStddevNormalization(*cellScratchDecoder,
                                 *cellScratch, nCell, nBatch, layerNormEpsilon);
         VectorBatchVectorCwiseProduct(*cellLayerNormWeights,
                                       nCell, *cellScratchDecoder, nBatch, *cellScratch);
         VectorBatchVectorAdd(*cellBiasTensor,
                              nCell, *cellScratchDecoder, nBatch, *cellScratch);
     }
  
     VectorVectorCwiseProduct(*forgetGateScratchDecoder, *cellStateIn, nBatch * nCell, *cellStateOut);
  
     ActivationFunction armnnActivationFunc = ActivationFunction::Sigmoid;
     float a = 0;
     float b = 0;
     SetActivationParameters(descriptor.m_ActivationFunc, armnnActivationFunc, a, b);
  
     if (descriptor.m_ActivationFunc > 0)
     {
         Activation(*cellScratchDecoder, *cellScratch,
                    TensorInfo({nCell, nBatch}, outputType),
                    armnnActivationFunc, a, b);
     }
     if (useCifg)
     {
         Sub1Vector(*forgetGateScratchDecoder, nBatch * nCell, *forgetGateScratch);
         VectorVectorCwiseProductAccumulate(
             *cellScratchDecoder, *forgetGateScratchDecoder, nBatch * nCell, *cellStateOut);
     }
     else
     {
         VectorVectorCwiseProductAccumulate(
             *cellScratchDecoder, *inputGateScratchDecoder, nBatch * nCell, *cellStateOut);
     }
     if (descriptor.m_ClippingThresCell > 0.0)
     {
         ClipVector(*cellStateOutDecoder, nBatch * nCell, descriptor.m_ClippingThresCell, *cellStateOut);
     }
  
     // For each batch and cell: update the output gate.
     if (usePeephole)
     {
         VectorBatchVectorCwiseProductAccumulate(*cellToOutputWeightsTensor,
                                                 nCell, *cellStateOutDecoder, nBatch, *outputGateScratch);
     }
     if (useLayerNorm)
     {
         MeanStddevNormalization(*outputGateScratchDecoder,
                                 *outputGateScratch, nCell, nBatch, layerNormEpsilon);
         VectorBatchVectorCwiseProduct(*outputLayerNormWeights,
                                       nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
         VectorBatchVectorAdd(*outputGateBiasTensor,
                              nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
     }
     Activation(*outputGateScratchDecoder, *outputGateScratch,
                TensorInfo({nCell, nBatch}, outputType),
                ActivationFunction::Sigmoid, 0, 0);
  
     if (descriptor.m_ActivationFunc > 0)
     {
         Activation(*cellStateOutDecoder, *cellScratch,
                    TensorInfo({nCell, nBatch}, outputType),
                    armnnActivationFunc, a, b);
     }
  
     VectorVectorCwiseProduct(*outputGateScratchDecoder, *cellScratchDecoder, nBatch * nCell, *outputGateScratch);
  
     // For each batch: update the projection and output_state.
     if (descriptor.m_ProjectionEnabled)
     {
         if (projectionBiasTensor)
         {
             VectorBatchVectorAssign(*projectionBiasTensor,
                                     nOutput, nBatch, *output);
         }
         MatrixBatchVectorMultiplyAccumulate(*projectionWeightsTensor,
                                             nOutput, nCell, *outputGateScratchDecoder, nBatch, *output);
  
         if (descriptor.m_ClippingThresProj > 0.0)
         {
             ClipVector(*outputDecoder, nBatch * nOutput, descriptor.m_ClippingThresProj, *output);
         }
     }
     else
     {
         CopyVector(*outputGateScratchDecoder, nBatch * nOutput, *output);
     }
  
     CopyVector(*outputDecoder, nBatch * nOutput, *outputStateOut);
 }

References Activation(), ClipVector(), CopyVector(), TensorInfo::GetDataType(), TensorInfo::GetShape(), LstmDescriptor::m_ActivationFunc, LstmDescriptor::m_CifgEnabled, LstmDescriptor::m_ClippingThresCell, LstmDescriptor::m_ClippingThresProj, LstmDescriptor::m_LayerNormEnabled, LstmDescriptor::m_PeepholeEnabled, LstmDescriptor::m_ProjectionEnabled, MatrixBatchVectorMultiplyAccumulate(), MeanStddevNormalization(), SetActivationParameters(), Sigmoid, Sub1Vector(), VectorBatchVectorAdd(), VectorBatchVectorAssign(), VectorBatchVectorCwiseProduct(), VectorBatchVectorCwiseProductAccumulate(), VectorVectorCwiseProduct(), VectorVectorCwiseProductAccumulate(), and ZeroVector().

◆ MakeDecoder() [1/2]

std::unique_ptr< Decoder< int32_t > > MakeDecoder	(	const TensorInfo &	info,
		const void *	data
	)

inline

Definition at line 64 of file Decoders.hpp.

 {
     switch(info.GetDataType())
     {
         case DataType::QAsymmS8:
         {
             return std::make_unique<QASymmS8Decoder>(
                 static_cast<const int8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QAsymmU8:
         {
             return std::make_unique<QASymm8Decoder>(
                 static_cast<const uint8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QSymmS16:
         {
             return std::make_unique<QSymm16Decoder>(
                 static_cast<const int16_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::Float16:
         {
             return std::make_unique<Float16Decoder>(static_cast<const Half*>(data));
         }
         case DataType::Float32:
         {
             return std::make_unique<Float32Decoder>(static_cast<const float*>(data));
         }
         case DataType::Signed32:
         {
             return MakeSigned32Decoder(info, data);
         }
         case DataType::QSymmS8:
         {
             if (info.HasPerAxisQuantization())
             {
                 std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
                 return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
             }
             else
             {
                 return std::make_unique<QSymmS8Decoder>(
                     static_cast<const int8_t*>(data),
                     info.GetQuantizationScale(),
                     info.GetQuantizationOffset());
             }
         }
         case armnn::DataType::Boolean:
         {
             return std::make_unique<BooleanDecoder>(static_cast<const uint8_t*>(data));
         }
         default:
         {
             throw InvalidArgumentException("Unsupported target Data Type!");
             break;
         }
     }
     return nullptr;
 }

References Boolean, Float16, Float32, armnnUtils::GetPerAxisParams(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeDecoder() [2/2]

std::unique_ptr<Decoder<T> > armnn::MakeDecoder	(	const TensorInfo &	info,
		const void *	data = `nullptr`
	)

inline

Definition at line 64 of file Decoders.hpp.

 {
     switch(info.GetDataType())
     {
         case DataType::QAsymmS8:
         {
             return std::make_unique<QASymmS8Decoder>(
                 static_cast<const int8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QAsymmU8:
         {
             return std::make_unique<QASymm8Decoder>(
                 static_cast<const uint8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QSymmS16:
         {
             return std::make_unique<QSymm16Decoder>(
                 static_cast<const int16_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::Float16:
         {
             return std::make_unique<Float16Decoder>(static_cast<const Half*>(data));
         }
         case DataType::Float32:
         {
             return std::make_unique<Float32Decoder>(static_cast<const float*>(data));
         }
         case DataType::Signed32:
         {
             return MakeSigned32Decoder(info, data);
         }
         case DataType::QSymmS8:
         {
             if (info.HasPerAxisQuantization())
             {
                 std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
                 return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
             }
             else
             {
                 return std::make_unique<QSymmS8Decoder>(
                     static_cast<const int8_t*>(data),
                     info.GetQuantizationScale(),
                     info.GetQuantizationOffset());
             }
         }
         case armnn::DataType::Boolean:
         {
             return std::make_unique<BooleanDecoder>(static_cast<const uint8_t*>(data));
         }
         default:
         {
             throw InvalidArgumentException("Unsupported target Data Type!");
             break;
         }
     }
     return nullptr;
 }

References Boolean, Float16, Float32, armnnUtils::GetPerAxisParams(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeEncoder() [1/2]

std::unique_ptr< Encoder< int32_t > > MakeEncoder	(	const TensorInfo &	info,
		void *	data
	)

inline

Definition at line 19 of file Encoders.hpp.

 {
     switch(info.GetDataType())
     {
         case armnn::DataType::QAsymmS8:
         {
             return std::make_unique<QASymmS8Encoder>(
                 static_cast<int8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case armnn::DataType::QAsymmU8:
         {
             return std::make_unique<QASymm8Encoder>(
                 static_cast<uint8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QSymmS8:
         {
             if (info.HasPerAxisQuantization())
             {
                 std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
                 return std::make_unique<QSymm8PerAxisEncoder>(
                         static_cast<int8_t*>(data),
                         params.second,
                         params.first);
             }
             else
             {
                 return std::make_unique<QSymmS8Encoder>(
                         static_cast<int8_t*>(data),
                         info.GetQuantizationScale(),
                         info.GetQuantizationOffset());
             }
         }
         case armnn::DataType::QSymmS16:
         {
             if (info.HasPerAxisQuantization())
             {
                 unsigned int axis = info.GetQuantizationDim().value();
                 auto axisDimensionality = info.GetShape()[axis];
                 std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
                 return std::make_unique<QSymm16PerAxisEncoder>(
                         static_cast<int16_t*>(data),
                         params.second,
                         params.first,
                         axisDimensionality);
             }
             else
             {
                 return std::make_unique<QSymm16Encoder>(
                         static_cast<int16_t *>(data),
                         info.GetQuantizationScale(),
                         info.GetQuantizationOffset());
             }
         }
         case armnn::DataType::Signed32:
         {
             return std::make_unique<Int32Encoder>(static_cast<int32_t*>(data));
         }
         case armnn::DataType::Float16:
         {
             return std::make_unique<Float16Encoder>(static_cast<Half*>(data));
         }
         case armnn::DataType::Float32:
         {
             return std::make_unique<Float32Encoder>(static_cast<float*>(data));
         }
         default:
         {
             throw InvalidArgumentException("Unsupported target Data Type!");
             break;
         }
     }
     return nullptr;
 }

References Float16, Float32, armnnUtils::GetPerAxisParams(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeEncoder() [2/2]

std::unique_ptr<Encoder<T> > armnn::MakeEncoder	(	const TensorInfo &	info,
		void *	data = `nullptr`
	)

inline

Definition at line 19 of file Encoders.hpp.

 {
     switch(info.GetDataType())
     {
         case armnn::DataType::QAsymmS8:
         {
             return std::make_unique<QASymmS8Encoder>(
                 static_cast<int8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case armnn::DataType::QAsymmU8:
         {
             return std::make_unique<QASymm8Encoder>(
                 static_cast<uint8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QSymmS8:
         {
             if (info.HasPerAxisQuantization())
             {
                 std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
                 return std::make_unique<QSymm8PerAxisEncoder>(
                         static_cast<int8_t*>(data),
                         params.second,
                         params.first);
             }
             else
             {
                 return std::make_unique<QSymmS8Encoder>(
                         static_cast<int8_t*>(data),
                         info.GetQuantizationScale(),
                         info.GetQuantizationOffset());
             }
         }
         case armnn::DataType::QSymmS16:
         {
             if (info.HasPerAxisQuantization())
             {
                 unsigned int axis = info.GetQuantizationDim().value();
                 auto axisDimensionality = info.GetShape()[axis];
                 std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
                 return std::make_unique<QSymm16PerAxisEncoder>(
                         static_cast<int16_t*>(data),
                         params.second,
                         params.first,
                         axisDimensionality);
             }
             else
             {
                 return std::make_unique<QSymm16Encoder>(
                         static_cast<int16_t *>(data),
                         info.GetQuantizationScale(),
                         info.GetQuantizationOffset());
             }
         }
         case armnn::DataType::Signed32:
         {
             return std::make_unique<Int32Encoder>(static_cast<int32_t*>(data));
         }
         case armnn::DataType::Float16:
         {
             return std::make_unique<Float16Encoder>(static_cast<Half*>(data));
         }
         case armnn::DataType::Float32:
         {
             return std::make_unique<Float32Encoder>(static_cast<float*>(data));
         }
         default:
         {
             throw InvalidArgumentException("Unsupported target Data Type!");
             break;
         }
     }
     return nullptr;
 }

References Float16, Float32, armnnUtils::GetPerAxisParams(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeInfo()

arm_compute::DetectionPostProcessLayerInfo armnn::MakeInfo ( const DetectionPostProcessDescriptor & descriptor )

Definition at line 17 of file NeonDetectionPostProcessWorkload.cpp.

 {
     return arm_compute::DetectionPostProcessLayerInfo(descriptor.m_MaxDetections,
                                                       descriptor.m_MaxClassesPerDetection,
                                                       descriptor.m_NmsScoreThreshold,
                                                       descriptor.m_NmsIouThreshold,
                                                       descriptor.m_NumClasses,
                                                       { descriptor.m_ScaleX,
                                                         descriptor.m_ScaleY,
                                                         descriptor.m_ScaleW,
                                                         descriptor.m_ScaleH },
                                                       descriptor.m_UseRegularNms,
                                                       descriptor.m_DetectionsPerClass);
 }

References DetectionPostProcessDescriptor::m_DetectionsPerClass, DetectionPostProcessDescriptor::m_MaxClassesPerDetection, DetectionPostProcessDescriptor::m_MaxDetections, DetectionPostProcessDescriptor::m_NmsIouThreshold, DetectionPostProcessDescriptor::m_NmsScoreThreshold, DetectionPostProcessDescriptor::m_NumClasses, and DetectionPostProcessDescriptor::m_UseRegularNms.

Referenced by NeonDetectionPostProcessValidate().

◆ MakeOptimizations()

Optimizer::Optimizations armnn::MakeOptimizations ( Args &&... args )

Definition at line 43 of file Optimizer.hpp.

 {
     Optimizer::Optimizations optimizations;
  
     Append(optimizations, std::forward<Args>(args)...);
  
     return optimizations;
 }

References Append().

Referenced by ApplyBackendOptimizations(), and Optimize().

◆ MakeOptional()

Optional<T> armnn::MakeOptional ( Args &&... args )

Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> object.

Definition at line 305 of file Optional.hpp.

 {
     return Optional<T>(CONSTRUCT_IN_PLACE, std::forward<Args>(args)...);
 }

References CONSTRUCT_IN_PLACE.

◆ MakeTransformIterator()

constexpr TransformIterator<Function, Iterator> armnn::MakeTransformIterator	(	Iterator	i,
		Function	f
	)

constexpr

Definition at line 90 of file TransformIterator.hpp.

 {
     return TransformIterator<Function, Iterator>(i, f);
 }

◆ MirrorPad()

void MirrorPad	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const ITensorHandle *	inputHandle,
		ITensorHandle *	outputHandle,
		const PadQueueDescriptor &	data
	)

Definition at line 59 of file MirrorPad.cpp.

 {
     auto padList  = data.m_Parameters.m_PadList;
     PaddingMode paddingMode = data.m_Parameters.m_PaddingMode;
  
     TensorShape outputShape = outputInfo.GetShape();
     TensorShape inputShape  = inputInfo.GetShape();
  
     unsigned int numOutputElements = outputInfo.GetNumElements();
     unsigned int numInputDimensions = inputShape.GetNumDimensions();
     assert(numInputDimensions == outputShape.GetNumDimensions());
  
     // If padding mode is Reflect then both paddings must be no greater than inputShape(i) - 1.
     // If padding mode is Symmetric then both paddings must be no greater than inputShape(i).
     const unsigned int isReflect = static_cast<unsigned int>(paddingMode == PaddingMode::Reflect);
     for(unsigned int i = 0; i < padList.size(); ++i)
     {
         if(padList.at(i).first > (inputShape[i] - isReflect) ||
            padList.at(i).second > (inputShape[i] - isReflect))
         {
             throw armnn::InvalidArgumentException("Paddings must be less (Reflect) or "
                                                   "equal (Symmetric) to the dimension size.");
         }
     }
  
     auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
     auto outData   = MakeEncoder<float>(outputInfo, outputHandle->Map());
  
     Decoder<float>& input  = *inputData;
     Encoder<float>& output = *outData;
  
     for(unsigned int idx = 0; idx < numOutputElements; ++idx)
     {
         // Get the coordinates of the current index in vector form. E.g inx 1 = [0, 0, 0, 1 ]
         const std::vector<unsigned int> coord = IndexToCoord(outputShape, idx);
  
         std::vector<unsigned int> dimensions;
         std::vector<unsigned int> coords;
  
         for(unsigned int i = 0; i < numInputDimensions; ++i)
         {
             dimensions.emplace_back(i);
             coords.emplace_back(coord[i]);
         }
  
         auto isInPadding = [&](unsigned int i)
         {
             return (coords[i] < padList[i].first || coords[i] > inputShape[i] + padList[i].first - 1);
         };
  
         auto getReflectIndex = [&](unsigned int i) -> unsigned int
         {
             if(isInPadding(i))
             {
                 if(coords[i] < padList[i].first)
                 {
                     return padList[i].first - coords[i];
                 }
                 else
                 {
                     return 2 * inputShape[i] + padList[i].first - 2 - coords[i];
                 }
             }
             return coords[i] - padList[i].first;
         };
  
         auto getSymmetricIndex = [&](unsigned int i) -> unsigned int
         {
             if(isInPadding(i))
             {
                 if(coords[i] < padList[i].first)
                 {
                     return padList[i].first - coords[i] - 1;
                 }
                 else
                 {
                     return 2 * inputShape[i] + padList[i].first - 1 - coords[i];
                 }
             }
             return coords[i] - padList[i].first;
         };
  
         // Location of the value in the input tensor to use in the output.
         std::vector<unsigned int> coordOfInput;
  
         // any_of works as a loop here to check if any of the dimensions are in the padding.
         // If dimensions is in the padding area, then create the coordinates of the location in the
         // input tensor to use in the output.
         // E.g.
         // Input tensor = [ 1, 2, 3 ], Rank = 1.
         // Output tensor = [ 2, 1, 2, 3, 1 ] if Reflect or [ 1, 1, 2, 3, 3 ] if Symmetric with a padding of (1, 1).
         // So it will either return [ 1 ] or [ 0 ] which is used to set the first value in the output tensor and so on.
         if(std::any_of(dimensions.begin(), dimensions.end(), isInPadding))
         {
             switch(paddingMode)
             {
                 case PaddingMode::Reflect:
                 {
                     for(unsigned int i = 0; i < numInputDimensions; ++i)
                     {
                         coordOfInput.emplace_back(getReflectIndex(i));
                     }
                     break;
                 }
                 case PaddingMode::Symmetric:
                 {
                     for(unsigned int i = 0; i < numInputDimensions; ++i)
                     {
                         coordOfInput.emplace_back(getSymmetricIndex(i));
                     }
                     break;
                 }
                 default:
                     throw InvalidArgumentException("Padding mode not supported.");
                     break;
             }
         }
         else
         {
             for(unsigned int i = 0; i < numInputDimensions; ++i)
             {
                 coordOfInput.emplace_back(coord[i] - padList[i].first);
             }
         }
  
         // Set output value using the coordinate of the input value to use.
         const unsigned int indexOfInput = CoordToIndex(inputShape, coordOfInput);
  
         input[indexOfInput];
         auto inputValue = input.Get();
  
         output[idx];
         output.Set(inputValue);
     }
 }

References Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), PadDescriptor::m_PaddingMode, PadDescriptor::m_PadList, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, ITensorHandle::Map(), Reflect, Encoder< IType >::Set(), and Symmetric.

◆ MockTensorHandleFactoryId()

constexpr const char* armnn::MockTensorHandleFactoryId ( )

constexpr

Definition at line 14 of file MockTensorHandleFactory.hpp.

 {
     return "Arm/Mock/TensorHandleFactory";
 }

Referenced by MockTensorHandleFactory::GetIdStatic().

◆ NeonAbsWorkloadValidate()

arm_compute::Status NeonAbsWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonAbsWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NEAbsLayer::validate(&aclInput, &aclOutput);
 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonActivationWorkloadValidate()

arm_compute::Status NeonActivationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ActivationDescriptor &	descriptor
	)

Definition at line 17 of file NeonActivationWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     const arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
  
     return arm_compute::NEActivationLayer::validate(&aclInput,
                                                     &aclOutput,
                                                     activationLayerInfo);
 }

Referenced by NeonLayerSupport::IsActivationSupported().

◆ NeonAdditionWorkloadValidate()

arm_compute::Status NeonAdditionWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 20 of file NeonAdditionWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     return arm_compute::NEArithmeticAddition::validate(&aclInput0,
                                                        &aclInput1,
                                                        &aclOutput,
                                                        arm_compute::ConvertPolicy::SATURATE,
                                                        activationInfo);
 }

Referenced by NeonLayerSupport::IsAdditionSupported(), IsLayerTypeSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonArgMinMaxWorkloadValidate()

arm_compute::Status NeonArgMinMaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ArgMinMaxDescriptor &	descriptor
	)

Definition at line 31 of file NeonArgMinMaxWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     auto numDims = input.GetNumDimensions();
     auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis);
     int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
  
     if (descriptor.m_Function == ArgMinMaxFunction::Max)
     {
         return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
                                                        arm_compute::ReductionOperation::ARG_IDX_MAX);
     }
     else
     {
         return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
                                                        arm_compute::ReductionOperation::ARG_IDX_MIN);
     }
 }

Referenced by NeonLayerSupport::IsArgMinMaxSupported().

◆ NeonBackendId()

constexpr const char* armnn::NeonBackendId ( )

constexpr

Definition at line 10 of file NeonBackendId.hpp.

10 { return "CpuAcc"; }

Referenced by NeonBackend::GetIdStatic().

◆ NeonBatchMatMulValidate()

arm_compute::Status NeonBatchMatMulValidate	(	const TensorInfo &	inputInfoX,
		const TensorInfo &	inputInfoY,
		const TensorInfo &	outputInfo,
		const BatchMatMulDescriptor &	descriptor,
		const bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 19 of file NeonBatchMatMulWorkload.cpp.

 {
     if (descriptor.m_AdjointX || descriptor.m_AdjointY )
     {
         throw Exception("Support for adjoint not implemented.");
     }
     if (descriptor.m_DataLayoutX != armnn::DataLayout::NCHW || descriptor.m_DataLayoutY != armnn::DataLayout::NCHW )
     {
         throw Exception("Only supported the MatMul in the last 2 dimensions");
     }
  
     arm_compute::TensorInfo aclInputInfoX = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoX);
     arm_compute::TensorInfo aclInputInfoY = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoY);
     arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(outputInfo);
  
     // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
     aclInputInfoX.set_are_values_constant(false);
     aclInputInfoY.set_are_values_constant(false);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     arm_compute::MatMulInfo matMulInfo;
     matMulInfo.adj_lhs(descriptor.m_TransposeX);
     matMulInfo.adj_rhs(descriptor.m_TransposeY);
  
     arm_compute::CpuMatMulSettings settings;
     settings.fast_math(isFastMathEnabled);
  
     return arm_compute::NEMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, settings,
                                            activationInfo);
 }

References BatchMatMulDescriptor::m_AdjointX, BatchMatMulDescriptor::m_AdjointY, BatchMatMulDescriptor::m_DataLayoutX, BatchMatMulDescriptor::m_DataLayoutY, and NCHW.

Referenced by NeonLayerSupport::IsBatchMatMulSupported().

◆ NeonBatchNormalizationValidate()

arm_compute::Status NeonBatchNormalizationValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TensorInfo &	mean,
		const TensorInfo &	var,
		const TensorInfo &	beta,
		const TensorInfo &	gamma,
		const BatchNormalizationDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 24 of file NeonBatchNormalizationWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclMeanInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclVarInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclBetaInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclGammaInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo,
                                                             &aclOutputInfo,
                                                             &aclMeanInfo,
                                                             &aclVarInfo,
                                                             &aclBetaInfo,
                                                             &aclGammaInfo,
                                                             descriptor.m_Eps,
                                                             activationInfo);
 }

Referenced by NeonLayerSupport::IsBatchNormalizationSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonBatchToSpaceNdWorkloadValidate()

arm_compute::Status NeonBatchToSpaceNdWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const BatchToSpaceNdDescriptor &	descriptor
	)

Definition at line 15 of file NeonBatchToSpaceNdWorkload.cpp.

 {
     arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     arm_compute::Status statusBatchToSpace  = arm_compute::Status(arm_compute::ErrorCode::OK);
     arm_compute::Status statusReshapeInput  = arm_compute::Status(arm_compute::ErrorCode::OK);
     arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
  
     arm_compute::TensorInfo aclReshapeInputInfo  = aclInputInfo;
     arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
  
     // When a spacial dimension is missing (rank=3) set W to 1
     const unsigned int rank = input.GetNumDimensions();
     if (rank == 3)
     {
         const arm_compute::TensorShape inputShape  = aclInputInfo.tensor_shape();
         const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
  
         if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
         {
             // In ACL dimensions are right to left: C, W, H, N
             aclReshapeInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
             aclReshapeOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
         }
         else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
         {
             // In ACL dimensions are right to left: W, H, C, N
             aclReshapeInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
             aclReshapeOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
         }
         else
         {
             throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
         }
  
         statusReshapeInput = arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
         statusReshapeOutput = arm_compute::NEReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
     }
  
     // ArmNN blockShape is [H, W] ACl asks for W, H
     int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
     int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
  
     const arm_compute::CropInfo cropInfo = BuildArmComputeCropInfo(descriptor, rank);
  
     statusBatchToSpace = arm_compute::NEBatchToSpaceLayer::validate(rank == 3 ? &aclReshapeInputInfo : &aclInputInfo,
                                                                     blockWidth,
                                                                     blockHeight,
                                                                     rank == 3 ? &aclReshapeOutputInfo : &aclOutputInfo,
                                                                     cropInfo);
  
     if (statusReshapeInput.error_code()  == arm_compute::ErrorCode::OK &&
         statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
         statusBatchToSpace.error_code()  == arm_compute::ErrorCode::OK)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All BatchToSpace layers validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "BatchToSpace layer validate status failed."
                                    + statusBatchToSpace.error_description()
                                    + statusReshapeInput.error_description()
                                    + statusReshapeOutput.error_description());
     }
 }

Referenced by NeonLayerSupport::IsBatchToSpaceNdSupported().

◆ NeonCastValidate()

arm_compute::Status NeonCastValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 19 of file NeonCastWorkload.cpp.

 {
     arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NECast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
 }

Referenced by NeonLayerSupport::IsCastSupported().

◆ NeonChannelShuffleValidate()

arm_compute::Status NeonChannelShuffleValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ChannelShuffleDescriptor &	descriptor
	)

Definition at line 17 of file NeonChannelShuffleWorkload.cpp.

 {
     arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     // In Arm NN and in NNAPI, channel shuffle implementation is datalayout agnostic and it has axis as a parameter.
     // The channel shuffle Implementation for Neon is dependent on datalayout and does not have axis as a parameter,
     // it only supports channel shuffle for 4D tensors in dimension C (1 or 3).
     arm_compute::DataLayout aclDataLayout;
     if (input.GetNumDimensions() == 4)
     {
         switch (descriptor.m_Axis)
         {
             case 1:
                 aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
                 break;
             case 3:
                 aclDataLayout = ConvertDataLayout(armnn::DataLayout::NHWC);
                 break;
             default:
                 return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported axis"};
         }
         aclInputInfo.set_data_layout(aclDataLayout);
         aclOutputInfo.set_data_layout(aclDataLayout);
         return arm_compute::NEChannelShuffleLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_NumGroups);
     }
     else
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported number of dimensions"};
     }
 }

Referenced by NeonLayerSupport::IsChannelShuffleSupported().

◆ NeonComparisonWorkloadValidate()

arm_compute::Status NeonComparisonWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ComparisonDescriptor &	descriptor
	)

Definition at line 16 of file NeonComparisonWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0 = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
  
     const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(descriptor);
  
     const arm_compute::Status aclStatus = arm_compute::NEElementwiseComparison::validate(&aclInput0,
                                                                                          &aclInput1,
                                                                                          &aclOutput,
                                                                                          comparisonOperation);
     return aclStatus;
 }

Referenced by NeonLayerSupport::IsComparisonSupported().

◆ NeonConcatWorkloadValidate()

arm_compute::Status NeonConcatWorkloadValidate	(	const std::vector< const TensorInfo * > &	inputs,
		const TensorInfo &	output,
		const OriginsDescriptor &	descriptor
	)

Definition at line 27 of file NeonConcatWorkload.cpp.

 {
     std::vector<arm_compute::TensorInfo> aclInputs;
     for (const TensorInfo* input : inputs)
     {
         arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
         aclInputs.emplace_back(aclInputInfo);
     }
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
     std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
     for (arm_compute::ITensorInfo& input : aclInputs)
     {
         aclInputPtrs.emplace_back(&input);
     }
  
     size_t aclAxis = CalcAxis(descriptor);
     return arm_compute::NEConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
 }

Referenced by NeonLayerSupport::IsConcatSupported().

◆ NeonConstantWorkloadValidate()

arm_compute::Status NeonConstantWorkloadValidate ( const TensorInfo & output )

Definition at line 20 of file NeonConstantWorkload.cpp.

 {
     const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     std::array<arm_compute::DataType,9> supportedTypes = {
             arm_compute::DataType::BFLOAT16,
             arm_compute::DataType::F16,
             arm_compute::DataType::F32,
             arm_compute::DataType::QASYMM8,
             arm_compute::DataType::QASYMM8_SIGNED,
             arm_compute::DataType::QSYMM16,
             arm_compute::DataType::QSYMM8,
             arm_compute::DataType::QSYMM8_PER_CHANNEL,
             arm_compute::DataType::S32
     };
     auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
  
     if (it != end(supportedTypes))
     {
         return arm_compute::Status{};
     }
     else
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
     }
 }

Referenced by NeonLayerSupport::IsConstantSupported().

◆ NeonConvertFp16ToFp32WorkloadValidate()

arm_compute::Status NeonConvertFp16ToFp32WorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 19 of file NeonConvertFp16ToFp32Workload.cpp.

 {
     // Fallback to portable software implementation if Compute Library NECast won't work, so
     // this method always returns success
  
     armnn::IgnoreUnused(input);
     armnn::IgnoreUnused(output);
     return arm_compute::Status();
 }

References IgnoreUnused().

Referenced by NeonLayerSupport::IsConvertFp16ToFp32Supported().

◆ NeonConvertFp32ToFp16WorkloadValidate()

arm_compute::Status NeonConvertFp32ToFp16WorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 21 of file NeonConvertFp32ToFp16Workload.cpp.

 {
     // Fallback to portable software implementation if Compute Library NECast won't work, so
     // this method always returns success
  
     armnn::IgnoreUnused(input);
     armnn::IgnoreUnused(output);
     return arm_compute::Status();
 }

References IgnoreUnused().

Referenced by NeonLayerSupport::IsConvertFp32ToFp16Supported().

◆ NeonConvolution2dWorkloadValidate()

arm_compute::Status NeonConvolution2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Convolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 24 of file NeonConvolution2dWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
  
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
                                                                       descriptor.m_DilationY);
  
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
  
     if (descriptor.m_BiasEnabled)
     {
         if (!biases.has_value())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN NeonConvolution2dWorkload has empty bias value."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
  
     arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     return arm_compute::NEConvolutionLayer::validate(&aclInputInfo,
                                                      &aclWeightsInfo,
                                                      optionalAclBiasesInfo,
                                                      &aclOutputInfo,
                                                      layerInfo,
                                                      arm_compute::WeightsInfo(),
                                                      aclDilationInfo,
                                                      activationInfo,
                                                      isFastMathEnabled);
 }

Referenced by NeonLayerSupport::IsConvolution2dSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonConvolution3dWorkloadValidate()

arm_compute::Status NeonConvolution3dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Convolution3dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 24 of file NeonConvolution3dWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         if (!biases.has_value())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN NeonConvolution3dWorkload has empty bias value."};
         }
  
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
                                                                     isFastMathEnabled,
                                                                     activationDescriptor);
  
     return arm_compute::NEConv3D::validate(&aclInputInfo,
                                            &aclWeightsInfo,
                                            optionalAclBiasesInfo,
                                            &aclOutputInfo,
                                            aclConv3DInfo);
 }

Referenced by NeonLayerSupport::IsConvolution3dSupported().

◆ NeonDepthToSpaceWorkloadValidate()

arm_compute::Status NeonDepthToSpaceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const DepthToSpaceDescriptor &	descriptor
	)

Definition at line 19 of file NeonDepthToSpaceWorkload.cpp.

 {
     DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
  
     int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
  
     return arm_compute::NEDepthToSpaceLayer::validate(&aclInput, &aclOutput, blockSize);
 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by NeonLayerSupport::IsDepthToSpaceSupported().

◆ NeonDepthwiseConvolutionWorkloadValidate()

arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const DepthwiseConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 29 of file NeonDepthwiseConvolutionWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo  = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
     //
     // ACL format for weights for depthwise is:
     // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
     // - [1, C, H, W] for [N, C, H, W] input/output layout
     //
     // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
     // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
     // so we do the permute here for the TensorInfo weights.
     unsigned int aclDepthMultiplier;
     TensorInfo weightsPermuted;
     std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
  
     // Convert the weights into the compute library format
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
  
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         if(!biases.has_value())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN NeonDepthwiseConvolutionWorkload has empty bias value."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
  
     const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
                                                                       descriptor.m_DilationY);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
         activationDescriptor);
  
     return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
                                                               &aclWeightsInfo,
                                                               optionalAclBiasesInfo,
                                                               &aclOutputInfo,
                                                               aclPadStrideInfo,
                                                               aclDepthMultiplier,
                                                               activationInfo,
                                                               aclDilationInfo);
 }

Referenced by NeonLayerSupport::IsDepthwiseConvolutionSupported(), NeonLayerSupport::IsDilatedDepthwiseConvolutionSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonDequantizeWorkloadValidate()

arm_compute::Status NeonDequantizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 22 of file NeonDequantizeWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
  
     return arm_compute::NEDequantizationLayer::validate(&aclInput, &aclOutput);
 }

Referenced by NeonLayerSupport::IsDequantizeSupported().

◆ NeonDetected()

bool NeonDetected ( )

Definition at line 37 of file Utils.cpp.

 {
 #if !defined(ARMNN_BUILD_BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
     auto hwcaps= getauxval(AT_HWCAP);
 #endif
  
 #if !defined(ARMNN_BUILD_BARE_METAL) && defined(__aarch64__)
  
     if (hwcaps & HWCAP_ASIMD)
     {
         // On an arm64 device with Neon.
         return true;
     }
     else
     {
         // On an arm64 device without Neon.
         return false;
     }
  
 #endif
 #if !defined(ARMNN_BUILD_BARE_METAL) && defined(__arm__)
  
     if (hwcaps & HWCAP_NEON)
     {
         // On an armhf device with Neon.
         return true;
     }
     else
     {
         // On an armhf device without Neon.
         return false;
     }
  
 #endif
  
     // This method of Neon detection is only supported on Linux so in order to prevent a false negative
     // we will return true in cases where detection did not run.
     return true;
 }

◆ NeonDetectionPostProcessValidate()

arm_compute::Status NeonDetectionPostProcessValidate	(	const TensorInfo &	boxEncodings,
		const TensorInfo &	scores,
		const TensorInfo &	anchors,
		const TensorInfo &	detectionBoxes,
		const TensorInfo &	detectionClasses,
		const TensorInfo &	detectionScores,
		const TensorInfo &	numDetections,
		const DetectionPostProcessDescriptor &	descriptor
	)

Definition at line 32 of file NeonDetectionPostProcessWorkload.cpp.

 {
     arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(descriptor);
  
     const arm_compute::TensorInfo aclBoxEncodings =
         armcomputetensorutils::BuildArmComputeTensorInfo(boxEncodings);
  
     const arm_compute::TensorInfo aclScores =
         armcomputetensorutils::BuildArmComputeTensorInfo(scores);
  
     const arm_compute::TensorInfo aclAnchors =
         armcomputetensorutils::BuildArmComputeTensorInfo(anchors);
  
     arm_compute::TensorInfo aclDetectionBoxes =
         armcomputetensorutils::BuildArmComputeTensorInfo(detectionBoxes);
  
     arm_compute::TensorInfo aclDetectionClasses =
         armcomputetensorutils::BuildArmComputeTensorInfo(detectionClasses);
  
     arm_compute::TensorInfo aclDetectionScores =
         armcomputetensorutils::BuildArmComputeTensorInfo(detectionScores);
  
     arm_compute::TensorInfo aclNumDetections =
         armcomputetensorutils::BuildArmComputeTensorInfo(numDetections);
  
     return arm_compute::NEDetectionPostProcessLayer::validate(
             &aclBoxEncodings,
             &aclScores,
             &aclAnchors,
             &aclDetectionBoxes,
             &aclDetectionClasses,
             &aclDetectionScores,
             &aclNumDetections,
             info);
 }

References info, and MakeInfo().

◆ NeonDivisionWorkloadValidate()

arm_compute::Status NeonDivisionWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 18 of file NeonDivisionWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     return arm_compute::NEElementwiseDivision::validate(&aclInput0,
                                                         &aclInput1,
                                                         &aclOutput,
                                                         activationInfo);
 }

Referenced by NeonLayerSupport::IsDivisionSupported(), IsLayerTypeSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonElementwiseBinaryWorkloadValidate()

arm_compute::Status NeonElementwiseBinaryWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ElementwiseBinaryDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 20 of file NeonElementwiseBinaryWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     switch (descriptor.m_Operation)
     {
         case armnn::BinaryOperation::Power:
             return arm_compute::NEElementwisePower::validate(&aclInput0,
                                                              &aclInput1,
                                                              &aclOutput,
                                                              activationInfo);
         case armnn::BinaryOperation::SqDiff:
             return arm_compute::NEElementwiseSquaredDiff::validate(&aclInput0,
                                                                    &aclInput1,
                                                                    &aclOutput,
                                                                    activationInfo);
         default:
             throw InvalidArgumentException("Unknown binary operator", CHECK_LOCATION());
     }
 }

Referenced by IsLayerTypeSupported().

◆ NeonExpWorkloadValidate()

arm_compute::Status NeonExpWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonExpWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NEExpLayer::validate(&aclInput, &aclOutput);
 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonFullyConnectedWorkloadValidate()

arm_compute::Status NeonFullyConnectedWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		const FullyConnectedDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 24 of file NeonFullyConnectedWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
     arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
     aclWeights.set_are_values_constant(weights.IsConstant());
  
     arm_compute::TensorInfo aclBiases;
     arm_compute::TensorInfo* optionalAclBiases = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
         aclBiases = BuildArmComputeTensorInfo(biases.value());
         aclBiases.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiases = &aclBiases;
     }
  
     const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
         ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
     return arm_compute::NEFullyConnectedLayer::validate(&aclInput,
                                                         &aclWeights,
                                                         optionalAclBiases,
                                                         &aclOutput,
                                                         fullyConnectedLayerInfo);
 }

Referenced by NeonLayerSupport::IsFullyConnectedSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonFusedWorkloadValidate()

arm_compute::Status NeonFusedWorkloadValidate	(	const std::vector< std::reference_wrapper< TensorInfo >> &	inputInfos,
		const std::vector< std::reference_wrapper< TensorInfo >> &	outputInfos,
		const FusedDescriptor &	fusedDescriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 22 of file NeonFusedWorkload.cpp.

 {
     std::vector<arm_compute::TensorInfo> actInputInfos;
     actInputInfos.reserve(inputInfos.size());
     for (size_t i = 0u; i < inputInfos.size(); ++i)
     {
         actInputInfos.emplace_back(BuildArmComputeTensorInfo(inputInfos[i]));
     }
  
     std::vector<arm_compute::TensorInfo> actOutputInfos;
     actOutputInfos.reserve(outputInfos.size());
     for (size_t i = 0u; i < outputInfos.size(); ++i)
     {
         actOutputInfos.emplace_back(BuildArmComputeTensorInfo(outputInfos[i]));
     }
  
     const arm_compute::ActivationLayerInfo activationInfo =
             ConvertActivationDescriptorToAclActivationLayerInfo(activationDescriptor);
  
     switch (fusedDescriptor.m_FusedKernelType)
     {
         case FusedKernelType::AddMulAdd:
             return arm_compute::NEAddMulAdd::validate(
                                 &actInputInfos[0],
                                 &actInputInfos[1],
                                 &actInputInfos[2],  // bn_mul
                                 &actInputInfos[3],  // bn_add
                                 actOutputInfos.size() == 1 ? nullptr : &actOutputInfos[0], // add_output
                                 actOutputInfos.size() == 1 ? &actOutputInfos[0] : &actOutputInfos[1], // final_output
                                 arm_compute::ConvertPolicy::SATURATE,
                                 activationInfo);
         default:
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "NeonFusedWorkloadValidate: no valid kernel type"};
     }
 }

Referenced by NeonLayerSupport::IsFusedSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonGatherNdWorkloadValidate()

arm_compute::Status NeonGatherNdWorkloadValidate	(	const TensorInfo &	paramsInfo,
		const TensorInfo &	indicesInfo,
		const TensorInfo &	outputInfo
	)

Validate Mul

Validate ReduceSum

Validate Gather

Validate Reshape

Return OK if all the layers are valid

Definition at line 14 of file NeonGatherNdWorkload.cpp.

 {
     // Calculate ND, K, W, C.
     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
  
     /// Validate Mul
     // Indices with shape { W, ND }
     armnn::TensorInfo indices_W_ND_Info = indicesInfo;
     indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
     const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
  
     // Flattened coefficients with shape { ND }
     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
     const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
  
     // Output of Mul with shape { W, ND }
     const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
  
     auto statusMul = arm_compute::NEPixelWiseMultiplication::validate(&aclIndicesInfo,
                                                                       &aclFlattenedCoeffInfo,
                                                                       &aclOutputMulInfo,
                                                                       1.0f,
                                                                       arm_compute::ConvertPolicy::WRAP,
                                                                       arm_compute::RoundingPolicy::TO_ZERO,
                                                                       arm_compute::ActivationLayerInfo());
  
     /// Validate ReduceSum
     // Flattened indices with shape { W }
     armnn::TensorInfo flattenedIndices_Info = indicesInfo;
     flattenedIndices_Info.SetShape({ keyIndices["W"] });
     const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
  
     const std::vector<unsigned int> armnnReduceAxes(1, 1);
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
                                                                           indices_W_ND_Info.GetNumDimensions(),
                                                                           armnnReduceAxes);
  
     auto statusReduceSum = arm_compute::NEReductionOperation::validate(&aclOutputMulInfo,
                                                                        &aclFlattenedIndicesInfo,
                                                                        static_cast<unsigned int>(coords[0]),
                                                                        arm_compute::ReductionOperation::SUM,
                                                                        false);
  
     /// Validate Gather
     // Params with shape { K, C }
     armnn::TensorInfo params_K_C_Info = paramsInfo;
     params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
     const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
  
     // Output of gather with shape { W, C }
     armnn::TensorInfo outputGather_Info = outputInfo;
     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
     const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
  
     auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
     auto statusGather =
             arm_compute::NEGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
  
     /// Validate Reshape
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
  
     auto statusReshape = arm_compute::NEReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
  
     /// Return OK if all the layers are valid
     auto okCode = arm_compute::ErrorCode::OK;
     if (statusMul.error_code()       == okCode &&
         statusReduceSum.error_code() == okCode &&
         statusGather.error_code()    == okCode &&
         statusReshape.error_code()   == okCode)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All GatherND layers validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "GatherND layer validate status failed.");
     }
 }

References CalculateGatherNdKeyIndices(), and TensorInfo::SetShape().

Referenced by NeonLayerSupport::IsGatherNdSupported().

◆ NeonGatherWorkloadValidate()

arm_compute::Status NeonGatherWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	indices,
		const TensorInfo &	output,
		const GatherDescriptor &	descriptor
	)

Definition at line 13 of file NeonGatherWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput   = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
     const arm_compute::TensorInfo aclOutput  = BuildArmComputeTensorInfo(output);
  
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
  
     return arm_compute::NEGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
 }

Referenced by NeonLayerSupport::IsGatherSupported().

◆ NeonInstanceNormalizationWorkloadValidate()

arm_compute::Status NeonInstanceNormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const InstanceNormalizationDescriptor &	descriptor
	)

Definition at line 19 of file NeonInstanceNormalizationWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     return arm_compute::NEInstanceNormalizationLayer::validate(&aclInputInfo,
                                                                &aclOutputInfo,
                                                                descriptor.m_Gamma,
                                                                descriptor.m_Beta,
                                                                descriptor.m_Eps);
 }

Referenced by NeonLayerSupport::IsInstanceNormalizationSupported().

◆ NeonL2NormalizationWorkloadValidate()

arm_compute::Status NeonL2NormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const L2NormalizationDescriptor &	descriptor
	)

Definition at line 19 of file NeonL2NormalizationFloatWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
  
     return arm_compute::NEL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps);
 }

Referenced by NeonLayerSupport::IsL2NormalizationSupported().

◆ NeonLogicalAndWorkloadValidate()

arm_compute::Status NeonLogicalAndWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 18 of file NeonLogicalAndWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::Status aclStatus = arm_compute::NELogicalAnd::validate(&aclInputInfo0,
                                                                               &aclInputInfo1,
                                                                               &aclOutputInfo);
     return aclStatus;
 }

Referenced by NeonLayerSupport::IsLogicalBinarySupported().

◆ NeonLogicalNotWorkloadValidate()

arm_compute::Status NeonLogicalNotWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 19 of file NeonLogicalNotWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::Status aclStatus = arm_compute::NELogicalNot::validate(&aclInputInfo,
                                                                               &aclOutputInfo);
     return aclStatus;
 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonLogicalOrWorkloadValidate()

arm_compute::Status NeonLogicalOrWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 18 of file NeonLogicalOrWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     const arm_compute::Status aclStatus = arm_compute::NELogicalOr::validate(&aclInputInfo0,
                                                                              &aclInputInfo1,
                                                                              &aclOutputInfo);
     return aclStatus;
 }

Referenced by NeonLayerSupport::IsLogicalBinarySupported().

◆ NeonLogSoftmaxWorkloadValidate()

arm_compute::Status NeonLogSoftmaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const LogSoftmaxDescriptor &	descriptor
	)

Definition at line 19 of file NeonLogSoftmaxWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
     return arm_compute::NELogSoftmaxLayer::validate(&aclInputInfo,
                                                     &aclOutputInfo,
                                                     descriptor.m_Beta,
                                                     aclAxis);
 }

Referenced by NeonLayerSupport::IsLogSoftmaxSupported().

◆ NeonLogWorkloadValidate()

arm_compute::Status NeonLogWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonLogWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NELogLayer::validate(&aclInput, &aclOutput);
 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonLstmFloatWorkloadValidate()

arm_compute::Status NeonLstmFloatWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	scratchBuffer,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const LstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 253 of file NeonLstmFloatWorkload.cpp.

 {
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
  
     // The inputs and outputs
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     // Basic parameters
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
  
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclInputGateBiasInfo;
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
  
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
  
  
     if (!descriptor.m_CifgEnabled)
     {
         if (descriptor.m_PeepholeEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
  
         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
                                          descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
                                          &aclInputGateBiasInfo);
     }
  
     if (descriptor.m_ProjectionEnabled)
     {
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
  
         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
                                                paramsInfo.m_ProjectionBias != nullptr ?
                                                &aclProjectionBiasInfo : nullptr);
     }
  
     if (descriptor.m_PeepholeEnabled)
     {
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
  
         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
     }
  
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
  
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ?
                                                         nullptr : &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
     }
  
     float cell_threshold = descriptor.m_ClippingThresCell;
     float projection_threshold = descriptor.m_ClippingThresProj;
  
     // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
     arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
  
     return arm_compute::NELSTMLayer::validate(&aclInputInfo,
                                               &aclInputToForgetWeightsInfo,
                                               &aclInputToCellWeightsInfo,
                                               &aclInputToOutputWeightsInfo,
                                               &aclRecurrentToForgetWeightsInfo,
                                               &aclRecurrentToCellWeightsInfo,
                                               &aclRecurrentToOutputWeightsInfo,
                                               &aclForgetGateBiasInfo,
                                               &aclCellBiasInfo,
                                               &aclOutputGateBiasInfo,
                                               &aclOutputStateInInfo,
                                               &aclCellStateInInfo,
                                               &aclScratchBufferInfo,
                                               &aclOutputStateOutInfo,
                                               &aclCellStateOutInfo,
                                               &aclOutputInfo,
                                               lstm_params_info,
                                               activationLayerInfo,
                                               cell_threshold,
                                               projection_threshold);
 }

Referenced by NeonLayerSupport::IsLstmSupported().

◆ NeonMaximumWorkloadValidate()

arm_compute::Status NeonMaximumWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 14 of file NeonMaximumWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NEElementwiseMax::validate(&aclInput0,
                                                    &aclInput1,
                                                    &aclOutput);
 }

Referenced by IsLayerTypeSupported(), and NeonLayerSupport::IsMaximumSupported().

◆ NeonMeanWorkloadValidate()

arm_compute::Status NeonMeanWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const MeanDescriptor &	descriptor
	)

Definition at line 18 of file NeonMeanWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                           input.GetNumDimensions(),
                                                                           descriptor.m_Axis);
  
     return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
 }

Referenced by NeonLayerSupport::IsMeanSupported().

◆ NeonMinimumWorkloadValidate()

arm_compute::Status NeonMinimumWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Validate function for validating the inputs and output.

Parameters

[in]	input0	The input0 value to be validated.
[in]	input1	The input1 value to be validated.
[in]	output	The output value to be validated.

Definition at line 15 of file NeonMinimumWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NEElementwiseMin::validate(&aclInput0,
                                                    &aclInput1,
                                                    &aclOutput);
 }

Referenced by IsLayerTypeSupported(), and NeonLayerSupport::IsMinimumSupported().

◆ NeonMultiplicationWorkloadValidate()

arm_compute::Status NeonMultiplicationWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 19 of file NeonMultiplicationWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     auto convertPolicy = (IsQuantizedType(input0.GetDataType()) || IsQuantizedType(input1.GetDataType())) ?
                           arm_compute::ConvertPolicy::SATURATE :
                           arm_compute::ConvertPolicy::WRAP;
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
     // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
     // ignored for F32 tensors.
     return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1,
                                                             &aclInput2,
                                                             &aclOutput,
                                                             1.0f,
                                                             convertPolicy,
                                                             arm_compute::RoundingPolicy::TO_ZERO,
                                                             activationInfo);
 }

Referenced by IsLayerTypeSupported(), NeonLayerSupport::IsMultiplicationSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonNegWorkloadValidate()

arm_compute::Status NeonNegWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonNegWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NENegLayer::validate(&aclInput, &aclOutput);
 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonNormalizationWorkloadValidate()

arm_compute::Status NeonNormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const NormalizationDescriptor &	descriptor
	)

Definition at line 49 of file NeonNormalizationFloatWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
  
     return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo);
 }

Referenced by NeonLayerSupport::IsNormalizationSupported().

◆ NeonPadWorkloadValidate()

arm_compute::Status NeonPadWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const PadDescriptor &	descriptor
	)

Definition at line 59 of file NeonPadWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size());
  
     std::reverse_copy(std::begin(descriptor.m_PadList),
                       std::end(descriptor.m_PadList),
                       std::begin(reversed_PadList));
  
     arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
  
     // PixelValue is currently unused when validating, but it's required to pass in PaddingMode.
     arm_compute::PixelValue pixelValue = GetPixelValue(&aclInputInfo, descriptor.m_PadValue);
     return arm_compute::NEPadLayer::validate(&aclInputInfo,
                                              &aclOutputInfo,
                                              padList,
                                              pixelValue,
                                              ConvertPaddingModeToAcl(descriptor.m_PaddingMode));
 }

Referenced by NeonLayerSupport::IsPadSupported().

◆ NeonPermuteWorkloadValidate()

arm_compute::Status NeonPermuteWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const PermuteDescriptor &	descriptor
	)

Definition at line 15 of file NeonPermuteWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
     const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
  
     return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
                                       armcomputetensorutils::BuildArmComputePermutationVector(mappings));
 }

Referenced by NeonLayerSupport::IsPermuteSupported().

◆ NeonPooling2dWorkloadValidate()

arm_compute::Status NeonPooling2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Pooling2dDescriptor &	descriptor
	)

Definition at line 22 of file NeonPooling2dWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo =
             BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo =
             BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
  
     return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
 }

Referenced by NeonLayerSupport::IsPooling2dSupported().

◆ NeonPooling3dWorkloadValidate()

arm_compute::Status NeonPooling3dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Pooling3dDescriptor &	descriptor
	)

Definition at line 15 of file NeonPooling3dWorkload.cpp.

     {
         const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
         const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
         arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
         return arm_compute::NEPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
     }

Referenced by NeonLayerSupport::IsPooling3dSupported().

◆ NeonPreluWorkloadValidate()

arm_compute::Status NeonPreluWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	alpha,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonPreluWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NEPReluLayer::validate(&aclInput,
                                                &aclAlpha,
                                                &aclOutput);
 }

Referenced by NeonLayerSupport::IsPreluSupported().

◆ NeonQLstmWorkloadValidate()

arm_compute::Status NeonQLstmWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	output,
		const QLstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 243 of file NeonQLstmWorkload.cpp.

 {
     arm_compute::LSTMParams<arm_compute::ITensorInfo> aclParamsInfo;
  
     // Input/Output tensor info
     const arm_compute::TensorInfo aclInputInfo         = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo   = BuildArmComputeTensorInfo(cellStateIn);
  
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo   = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputInfo         = BuildArmComputeTensorInfo(output);
  
     // Mandatory tensor info
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
  
     // Optional tensor info
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
  
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
  
     arm_compute::TensorInfo aclInputGateBiasInfo;
  
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
  
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
  
     // Create tensor info for optional params if they are enabled
     if (descriptor.m_PeepholeEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
  
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
  
         // Set peephole params info
         aclParamsInfo.set_peephole_params(&aclCellToForgetWeightsInfo,
                                           &aclCellToOutputWeightsInfo);
     }
  
     if (descriptor.m_ProjectionEnabled)
     {
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
  
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
  
         // Set projection params info
         aclParamsInfo.set_projection_params(
             &aclProjectionWeightsInfo,
             paramsInfo.m_ProjectionBias != nullptr ? &aclProjectionBiasInfo : nullptr);
     }
  
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
  
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo   = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
  
         // Set layer norm params info
         aclParamsInfo.set_layer_normalization_params(
             paramsInfo.m_InputLayerNormWeights != nullptr ? &aclInputLayerNormWeightsInfo : nullptr,
             &aclForgetLayerNormWeightsInfo,
             &aclCellLayerNormWeightsInfo,
             &aclOutputLayerNormWeightsInfo);
     }
  
     if (!descriptor.m_CifgEnabled)
     {
         aclInputToInputWeightsInfo     = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo           = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
  
         // Set CIFG params info
         aclParamsInfo.set_cifg_params(
             &aclInputToInputWeightsInfo,
             &aclRecurrentToInputWeightsInfo,
             paramsInfo.m_CellToInputWeights != nullptr ? &aclCellToInputWeightsInfo : nullptr,
             &aclInputGateBiasInfo);
     }
  
     // Set scalar descriptor params
     aclParamsInfo.set_cell_clip_params(descriptor.m_CellClip);
     aclParamsInfo.set_projection_clip_params(descriptor.m_ProjectionClip);
     aclParamsInfo.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
     aclParamsInfo.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
                                           descriptor.m_ForgetIntermediateScale,
                                           descriptor.m_CellIntermediateScale,
                                           descriptor.m_OutputIntermediateScale);
  
     // QLSTM NEON validate
     return arm_compute::NEQLSTMLayer::validate(&aclInputInfo,
                                                &aclInputToForgetWeightsInfo,
                                                &aclInputToCellWeightsInfo,
                                                &aclInputToOutputWeightsInfo,
                                                &aclRecurrentToForgetWeightsInfo,
                                                &aclRecurrentToCellWeightsInfo,
                                                &aclRecurrentToOutputWeightsInfo,
                                                &aclForgetGateBiasInfo,
                                                &aclCellBiasInfo,
                                                &aclOutputGateBiasInfo,
                                                &aclCellStateInInfo,
                                                &aclOutputStateInInfo,
                                                &aclCellStateOutInfo,
                                                &aclOutputStateOutInfo,
                                                &aclOutputInfo,
                                                aclParamsInfo);
 }

Referenced by NeonLayerSupport::IsQLstmSupported().

◆ NeonQuantizedLstmWorkloadValidate()

arm_compute::Status NeonQuantizedLstmWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	outputStateOut,
		const QuantizedLstmInputParamsInfo &	paramsInfo
	)

Definition at line 131 of file NeonQuantizedLstmWorkload.cpp.

 {
     // The inputs and outputs
     const arm_compute::TensorInfo aclInputInfo          = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclCellStateInInfo    = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclOutputStateInInfo  = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateOutInfo   = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
  
     // Basic parameters
     const arm_compute::TensorInfo aclInputToInputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
  
     const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
  
     const arm_compute::TensorInfo aclInputGateBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
  
     return arm_compute::NELSTMLayerQuantized::validate(&aclInputInfo,
                                                        &aclInputToInputWeightsInfo,
                                                        &aclInputToForgetWeightsInfo,
                                                        &aclInputToCellWeightsInfo,
                                                        &aclInputToOutputWeightsInfo,
                                                        &aclRecurrentToInputWeightsInfo,
                                                        &aclRecurrentToForgetWeightsInfo,
                                                        &aclRecurrentToCellWeightsInfo,
                                                        &aclRecurrentToOutputWeightsInfo,
                                                        &aclInputGateBiasInfo,
                                                        &aclForgetGateBiasInfo,
                                                        &aclCellBiasInfo,
                                                        &aclOutputGateBiasInfo,
                                                        &aclCellStateInInfo,
                                                        &aclOutputStateInInfo,
                                                        &aclCellStateOutInfo,
                                                        &aclOutputStateOutInfo);
 }

Referenced by NeonLayerSupport::IsQuantizedLstmSupported().

◆ NeonQuantizeWorkloadValidate()

arm_compute::Status NeonQuantizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file NeonQuantizeWorkload.cpp.

 {
     const arm_compute::TensorInfo neonInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NEQuantizationLayer::validate(&neonInputInfo, &neonOutputInfo);
 }

Referenced by NeonLayerSupport::IsQuantizeSupported().

◆ NeonReduceWorkloadValidate()

arm_compute::Status NeonReduceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ReduceDescriptor &	descriptor
	)

Definition at line 19 of file NeonReduceWorkload.cpp.

 {
     if ( descriptor.m_vAxis.size()==1 || descriptor.m_vAxis.empty())
     {
         const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
         const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
         arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                               input.GetNumDimensions(),
                                                                               descriptor.m_vAxis);
  
         return arm_compute::NEReductionOperation::validate(&aclInputInfo,
                                                            &aclOutputInfo,
                                                            static_cast<unsigned int>(coords[0]),
                                                            ConvertReductionOperationToAcl(descriptor),
                                                            descriptor.m_KeepDims);
     }
     else
     {
         // Validate layer if there are multiple axes.
         arm_compute::Status status;
         IS_MULTI_AXES_REDUCE_SUPPORTED(NeonReduceWorkloadValidate, input, descriptor, status);
         return status;
     }
 }

References ReduceDescriptor::m_vAxis.

Referenced by NeonLayerSupport::IsReduceSupported().

◆ NeonReshapeWorkloadValidate()

arm_compute::Status NeonReshapeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonReshapeWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
 }

Referenced by NeonLayerSupport::IsReshapeSupported().

◆ NeonResizeWorkloadValidate()

arm_compute::Status NeonResizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ResizeDescriptor &	descriptor
	)

Definition at line 22 of file NeonResizeWorkload.cpp.

 {
     arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
  
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout);
     aclInputInfo.set_data_layout(aclDataLayout);
     aclOutputInfo.set_data_layout(aclDataLayout);
  
     arm_compute::InterpolationPolicy aclInterpolationPolicy =
             ConvertResizeMethodToAclInterpolationPolicy(descriptor.m_Method);
  
     arm_compute::SamplingPolicy samplingPolicy = descriptor.m_HalfPixelCenters ? arm_compute::SamplingPolicy::CENTER :
                                                                                  arm_compute::SamplingPolicy::TOP_LEFT;
  
     bool usePadding = false;
  
     return arm_compute::NEScale::validate(&aclInputInfo,
                                           &aclOutputInfo,
                                           arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
                                                                        arm_compute::BorderMode::REPLICATE,
                                                                        arm_compute::PixelValue(0.f),
                                                                        samplingPolicy,
                                                                        usePadding,
                                                                        descriptor.m_AlignCorners));
  
 }

Referenced by NeonLayerSupport::IsResizeSupported().

◆ NeonReverseV2WorkloadValidate()

arm_compute::Status NeonReverseV2WorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	axis,
		const TensorInfo &	output
	)

Definition at line 14 of file NeonReverseV2Workload.cpp.

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclAxis = BuildArmComputeTensorInfo(axis);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
  
     return arm_compute::NEReverse::validate(&aclInput, &aclOutput, &aclAxis, true);
 }

Referenced by NeonLayerSupport::IsReverseV2Supported().

◆ NeonRsqrtWorkloadValidate()

arm_compute::Status NeonRsqrtWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file NeonRsqrtWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NERsqrtLayer::validate(&aclInput, &aclOutput);
 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonSinWorkloadValidate()

arm_compute::Status NeonSinWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonSinWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     return arm_compute::NESinLayer::validate(&aclInput, &aclOutput);
 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonSliceWorkloadValidate()

arm_compute::Status NeonSliceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SliceDescriptor &	descriptor
	)

Definition at line 21 of file NeonSliceWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
  
     std::tie(starts, ends) = SetNeonSliceData(descriptor.m_Begin, descriptor.m_Size);
  
     return arm_compute::NESlice::validate(&aclInputInfo, &aclOutputInfo, starts, ends);
 }

Referenced by NeonLayerSupport::IsSliceSupported().

◆ NeonSoftmaxWorkloadValidate()

arm_compute::Status NeonSoftmaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SoftmaxDescriptor &	descriptor
	)

Definition at line 19 of file NeonSoftmaxWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
     return arm_compute::NESoftmaxLayer::validate(&aclInputInfo,
                                                  &aclOutputInfo,
                                                  descriptor.m_Beta,
                                                  aclAxis);
 }

Referenced by NeonLayerSupport::IsSoftmaxSupported().

◆ NeonSpaceToBatchNdWorkloadValidate()

arm_compute::Status NeonSpaceToBatchNdWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SpaceToBatchNdDescriptor &	descriptor
	)

Definition at line 15 of file NeonSpaceToBatchNdWorkload.cpp.

 {
     arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
  
     arm_compute::Status statusSpaceToBatch  = arm_compute::Status(arm_compute::ErrorCode::OK);
     arm_compute::Status statusReshapeInput  = arm_compute::Status(arm_compute::ErrorCode::OK);
     arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
  
     arm_compute::TensorInfo aclReshapeInputInfo  = aclInputInfo;
     arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
  
     // When a spacial dimension is missing (rank=3) set W to 1
     const unsigned int rank = input.GetNumDimensions();
     if (rank == 3)
     {
         const arm_compute::TensorShape inputShape  = aclInputInfo.tensor_shape();
         const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
  
         if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
         {
             // In ACL dimensions are right to left: C, W, H, N
             aclReshapeInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
             aclReshapeOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
         }
         else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
         {
             // In ACL dimensions are right to left: W, H, C, N
             aclReshapeInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
             aclReshapeOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
         }
         else
         {
             throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
         }
  
         statusReshapeInput = arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
         statusReshapeOutput = arm_compute::NEReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
     }
  
     // ArmNN blockShape is [H, W] ACl asks for W, H
     int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
     int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
  
     unsigned int padLeft  = (rank == 3) ? 0 : descriptor.m_PadList[1].first;
     unsigned int padRight = (rank == 3) ? 0 : descriptor.m_PadList[1].second;
     arm_compute::Size2D paddingLeftTop      = BuildArmComputeSize2D(padLeft,
                                                                     descriptor.m_PadList[0].first);
     arm_compute::Size2D paddingRightBottom  = BuildArmComputeSize2D(padRight,
                                                                     descriptor.m_PadList[0].second);
  
     statusSpaceToBatch = arm_compute::NESpaceToBatchLayer::validate(rank == 3 ? &aclReshapeInputInfo : &aclInputInfo,
                                                                     blockWidth,
                                                                     blockHeight,
                                                                     paddingLeftTop,
                                                                     paddingRightBottom,
                                                                     rank == 3 ? &aclReshapeOutputInfo : &aclOutputInfo);
  
     if (statusReshapeInput.error_code()  == arm_compute::ErrorCode::OK &&
         statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
         statusSpaceToBatch.error_code()  == arm_compute::ErrorCode::OK)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All SpaceToBatch layers validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "SpaceToBatch layer validate status failed."
                                    + statusSpaceToBatch.error_description()
                                    + statusReshapeInput.error_description()
                                    + statusReshapeOutput.error_description());
     }
 }

Referenced by NeonLayerSupport::IsSpaceToBatchNdSupported().

◆ NeonSpaceToDepthWorkloadValidate()

arm_compute::Status NeonSpaceToDepthWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SpaceToDepthDescriptor &	descriptor
	)

Definition at line 19 of file NeonSpaceToDepthWorkload.cpp.

 {
     DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
  
     int32_t blockSize  = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
  
     return arm_compute::NESpaceToDepthLayer::validate(&aclInput, &aclOutput, blockSize);
 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by NeonLayerSupport::IsSpaceToDepthSupported().

◆ NeonSplitterWorkloadValidate()

arm_compute::Status NeonSplitterWorkloadValidate	(	const TensorInfo &	input,
		const std::vector< std::reference_wrapper< TensorInfo >> &	outputs,
		unsigned int	splitAxis
	)

Definition at line 32 of file NeonSplitterWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
  
     size_t numOutputs = outputs.size();
  
     std::vector<arm_compute::TensorInfo> aclOutputs;
     aclOutputs.reserve(numOutputs);
  
     std::vector<arm_compute::ITensorInfo*> aclOutputPtr;
     aclOutputPtr.reserve(numOutputs);
  
     for (size_t i = 0u; i < outputs.size(); ++i)
     {
         aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i]));
         aclOutputPtr.emplace_back(&aclOutputs.back());
     }
  
     unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis);
     return arm_compute::NESplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
 }

Referenced by NeonLayerSupport::IsSplitterSupported().

◆ NeonSqrtWorkloadValidate()

arm_compute::Status NeonSqrtWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file NeonSqrtWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     ActivationDescriptor descriptor;
     descriptor.m_Function = ActivationFunction::Sqrt;
     const arm_compute::ActivationLayerInfo activationLayerInfo =
             ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
  
     return arm_compute::NEActivationLayer::validate(&aclInput, &aclOutput, activationLayerInfo);
 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonStackWorkloadValidate()

arm_compute::Status NeonStackWorkloadValidate	(	const std::vector< const TensorInfo * > &	inputs,
		const TensorInfo &	output,
		const StackDescriptor &	descriptor
	)

Definition at line 27 of file NeonStackWorkload.cpp.

 {
     std::vector<arm_compute::TensorInfo> aclInputs;
     for (const TensorInfo* input : inputs)
     {
         arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
         aclInputs.emplace_back(aclInputInfo);
     }
  
     std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
     for (arm_compute::ITensorInfo& input : aclInputs)
     {
         aclInputPtrs.emplace_back(&input);
     }
  
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
     int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
     return arm_compute::NEStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
 }

Referenced by NeonLayerSupport::IsStackSupported().

◆ NeonStridedSliceWorkloadValidate()

arm_compute::Status NeonStridedSliceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const StridedSliceDescriptor &	descriptor
	)

Definition at line 19 of file NeonStridedSliceWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input,
                                                                                               descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output,
                                                                                                descriptor.m_DataLayout);
  
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
     arm_compute::Coordinates strides;
  
     std::tie(starts, ends, strides) = SetNeonStridedSliceData(descriptor.m_Begin,
                                                               descriptor.m_End,
                                                               descriptor.m_Stride);
  
     auto numDimensions       = armnn::numeric_cast<int>(input.GetNumDimensions());
     int32_t begin_mask       = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions);
     int32_t end_mask         = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions);
     int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions);
  
     return arm_compute::NEStridedSlice::validate(&aclInput,
                                                  &aclOutput,
                                                  starts,
                                                  ends,
                                                  strides,
                                                  begin_mask,
                                                  end_mask,
                                                  shrink_axis_mask);
 }

Referenced by NeonLayerSupport::IsStridedSliceSupported().

◆ NeonSubtractionWorkloadValidate()

arm_compute::Status NeonSubtractionWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 22 of file NeonSubtractionWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
  
     return arm_compute::NEArithmeticSubtraction::validate(&aclInput0,
                                                           &aclInput1,
                                                           &aclOutput,
                                                           arm_compute::ConvertPolicy::SATURATE,
                                                           activationInfo);
 }

Referenced by IsLayerTypeSupported(), NeonLayerSupport::IsSubtractionSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonTensorHandleFactoryId()

constexpr const char* armnn::NeonTensorHandleFactoryId ( )

constexpr

Definition at line 14 of file NeonTensorHandleFactory.hpp.

14 { return "Arm/Neon/TensorHandleFactory"; }

Referenced by NeonTensorHandleFactory::GetIdStatic().

◆ NeonTileWorkloadValidate()

arm_compute::Status NeonTileWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TileDescriptor &	descriptor
	)

Definition at line 14 of file NeonTileWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
  
     std::vector<unsigned int> aclMultiples = descriptor.m_Multiples;
     std::reverse(aclMultiples.begin(),aclMultiples.end());
  
     return arm_compute::NETile::validate(&aclInput, &aclOutput, aclMultiples);
 }

Referenced by NeonLayerSupport::IsTileSupported().

◆ NeonTransposeConvolution2dWorkloadValidate()

arm_compute::Status NeonTransposeConvolution2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TransposeConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases
	)

Definition at line 25 of file NeonTransposeConvolution2dWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo  = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
  
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
  
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
  
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
  
     arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
  
     return arm_compute::NEDeconvolutionLayer::validate(&aclInputInfo,
                                                        &aclWeightsInfo,
                                                        optionalAclBiasesInfo,
                                                        &aclOutputInfo,
                                                        layerInfo);
 }

Referenced by NeonLayerSupport::IsTransposeConvolution2dSupported().

◆ NeonTransposeWorkloadValidate()

arm_compute::Status NeonTransposeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TransposeDescriptor &	descriptor
	)

Definition at line 15 of file NeonTransposeWorkload.cpp.

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
     const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
  
     return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
                                             armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
 }

Referenced by NeonLayerSupport::IsTransposeSupported().

◆ NeonUnidirectionalSequenceLstmFloatWorkloadValidate()

arm_compute::Status NeonUnidirectionalSequenceLstmFloatWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const UnidirectionalSequenceLstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 510 of file NeonUnidirectionalSequenceLstmFloatWorkload.cpp.

 {
     TensorShape inputLayerShape = input.GetShape();
     TensorShape outputLayerShape = output.GetShape();
  
     if (inputLayerShape.GetNumDimensions() != 3)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "Unidirectional Sequence LSTM layer validate status failed.");
     }
  
     unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
     unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
     unsigned int inputSize = inputLayerShape[2];
     unsigned int outputSize = outputLayerShape[2];
  
     const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
     const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
  
     arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute1 status");
     arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                           "Split status");
     arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                          "LSTM status");
     arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                            "Concat status");
     arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute2 status");
  
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     //
     // Permute validate
     //
     TensorInfo permuteOutInfo = armnnUtils::Permuted(input, { 1U, 0U, 2U });
     arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
     if (!descriptor.m_TimeMajor)
     {
         statusPermute1 =  arm_compute::NEPermute::validate(&aclInputInfo,
                                                            &aclPermuteOutInfo,
                                                            arm_compute::PermutationVector(0U, 2U, 1U));
     }
  
     //
     // Split and Concat Tensors validate
     //
     std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
     std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
     std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
     std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
     splitterOutputsTensorInfos.reserve(maxTime);
     concatInputsTensorInfos.reserve(maxTime);
     for (unsigned int i = 0; i < maxTime; ++i)
     {
         arm_compute::TensorInfo splitter_out;
         arm_compute::TensorInfo concat_in;
  
         auto splitterTensorInfo = TensorInfo(input);
         auto concatTensorInfo   = TensorInfo(output);
         splitterTensorInfo.SetShape({batchSize, inputSize});
         concatTensorInfo.SetShape({batchSize, outputSize});
  
         arm_compute::TensorInfo aclSplitterTensorInfo
             = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
         arm_compute::TensorInfo aclConcatTensorInfo
             = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
  
         splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
         concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
         splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
         concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
     }
  
     //
     // Split validate
     //
     unsigned int numberDimensions = 3;
     unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
     unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
  
     if (maxTime != 1) // ACL split does not work with only one element to split.
     {
         if (!descriptor.m_TimeMajor)
         {
             statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,
                                                          splitterOutputsTensorInfosPtr,
                                                          aclAxisSplit);
         }
         else
         {
             statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
         }
     }
  
     //
     // LSTM validate
     //
  
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
  
     unsigned int numUnits = cellStateIn.GetShape()[1];
     unsigned int scratchBufferFactor = 4;
  
     if (descriptor.m_CifgEnabled)
     {
         // scratchBuffer = { batchSize, numUnits * 3 } with CIFG
        scratchBufferFactor = 3;
     }
  
     const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());
  
     // The inputs and outputs
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
  
     // Basic parameters
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
  
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclInputGateBiasInfo;
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
  
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
  
  
     if (!descriptor.m_CifgEnabled)
     {
         if (descriptor.m_PeepholeEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
  
         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
                                          &aclRecurrentToInputWeightsInfo,
                                          descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
                                          &aclInputGateBiasInfo);
     }
  
     if (descriptor.m_ProjectionEnabled)
     {
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
  
         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
                                                paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
     }
  
     if (descriptor.m_PeepholeEnabled)
     {
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
  
         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
     }
  
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
  
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
                                                         &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
     }
  
     // Need to be set at negative threshold to be compatible for ACL
     float cell_threshold = descriptor.m_ClippingThresCell;
     float projection_threshold = descriptor.m_ClippingThresProj;
  
     arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
  
     for (unsigned int i = 0; i != maxTime; ++i)
     {
  
         // Set LSTM input and output ITensors depending on:
         // input format (timeMajor) & number of LSTM batches (maxTime).
         arm_compute::ITensorInfo* outputLSTM;
         arm_compute::ITensorInfo* inputLSTM;
  
         // If there is only one LSTM time major batch, we will not concat OR permute.
         // Set input of LSTM to be first input ITensor.
         // Set output of LSTM to be final output ITensor.
         // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
         if (maxTime == 1 && descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
             TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
  
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
  
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
  
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
  
             const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
             outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
         }
         // If there is only one LSTM batch major batch, we will not concat, only permute.
         // Set input of LSTM to be output of initial permute.
         // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
         // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
         else if (maxTime == 1 && !descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = &aclPermuteOutInfo;
  
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
         // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
         else
         {
             inputLSTM = splitterOutputsTensorInfosPtr[i];
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
  
         statusLSTM = arm_compute::NELSTMLayer::validate(inputLSTM,
                                                         &aclInputToForgetWeightsInfo,
                                                         &aclInputToCellWeightsInfo,
                                                         &aclInputToOutputWeightsInfo,
                                                         &aclRecurrentToForgetWeightsInfo,
                                                         &aclRecurrentToCellWeightsInfo,
                                                         &aclRecurrentToOutputWeightsInfo,
                                                         &aclForgetGateBiasInfo,
                                                         &aclCellBiasInfo,
                                                         &aclOutputGateBiasInfo,
                                                         &aclOutputStateInInfo,
                                                         &aclCellStateInInfo,
                                                         &aclScratchBufferInfo,
                                                         &aclOutputStateOutInfo,
                                                         &aclCellStateOutInfo,
                                                         outputLSTM,
                                                         lstm_params_info,
                                                         activationLayerInfo,
                                                         cell_threshold,
                                                         projection_threshold);
  
         if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
         {
             break;
         }
     }
  
     //
     // Concat validate
     //
  
     // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
     TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
     TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
     TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
  
     TensorInfo concatOutputTensorInfo = TensorInfo(output);
     concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
     arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
  
     if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
     {
         for (unsigned int i = 0; i < maxTime; ++i)
         {
             auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
             concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
         }
  
         unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
         if (!descriptor.m_TimeMajor)
         {
             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclConcatOutputTensorInfo,
                                                                      aclAxisConcat);
         }
         else
         {
             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclOutputInfo,
                                                                      aclAxisConcat);
         }
     }
     // If only one LSTM batch, we do not concat and/or permute.
     // Must ensure final output info is expanded to correct batch major dimensions.
     else
     {
         if (!descriptor.m_TimeMajor)
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandBatchMajor));
         }
         else
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandTimeMajor));
         }
     }
  
     //
     // Permute validate
     //
     if (!descriptor.m_TimeMajor)
     {
         // Output now time major. Permute output back to batch major.
         if (maxTime != 1)
         {
             statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
         else
         {
             statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
     }
  
     auto okCode = arm_compute::ErrorCode::OK;
     if (statusPermute1.error_code() == okCode &&
         statusSplit.error_code()    == okCode &&
         statusLSTM .error_code()    == okCode &&
         statusConcat.error_code()   == okCode &&
         statusPermute2.error_code() == okCode)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All Unidirectional Sequence LSTM layer validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "Unidirectional Sequence LSTM layer validate status failed.");
     }
 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by NeonLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ NeonUnidirectionalSequenceLstmWorkloadValidate()

arm_compute::Status NeonUnidirectionalSequenceLstmWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const UnidirectionalSequenceLstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 491 of file NeonUnidirectionalSequenceLstmWorkload.cpp.

 {
     TensorShape inputLayerShape = input.GetShape();
     TensorShape outputLayerShape = output.GetShape();
  
     if (inputLayerShape.GetNumDimensions() != 3)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "Unidirectional Sequence LSTM layer validate status failed.");
     }
  
     unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
     unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
     unsigned int inputSize = inputLayerShape[2];
     unsigned int outputSize = outputLayerShape[2];
  
     const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
     const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
  
     arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute1 status");
     arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                           "Split status");
     arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                          "LSTM status");
     arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                            "Concat status");
     arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute2 status");
  
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(output);
  
     //
     // Permute validate
     //
     TensorInfo permuteOutInfo = armnnUtils::Permuted(input, { 1U, 0U, 2U });
     arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
     if (!descriptor.m_TimeMajor)
     {
         statusPermute1 =  arm_compute::NEPermute::validate(&aclInputInfo,
                                                            &aclPermuteOutInfo,
                                                            arm_compute::PermutationVector(0U, 2U, 1U));
     }
  
     //
     // Split and Concat Tensors validate
     //
     std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
     std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
     std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
     std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
     splitterOutputsTensorInfos.reserve(maxTime);
     concatInputsTensorInfos.reserve(maxTime);
     for (unsigned int i = 0; i < maxTime; ++i)
     {
         arm_compute::TensorInfo splitter_out;
         arm_compute::TensorInfo concat_in;
  
         auto splitterTensorInfo = TensorInfo(input);
         auto concatTensorInfo   = TensorInfo(output);
         splitterTensorInfo.SetShape({batchSize, inputSize});
         concatTensorInfo.SetShape({batchSize, outputSize});
  
         arm_compute::TensorInfo aclSplitterTensorInfo
             = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
         arm_compute::TensorInfo aclConcatTensorInfo
             = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
  
         splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
         concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
         splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
         concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
     }
  
     //
     // Split validate
     //
     unsigned int numberDimensions = 3;
     unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
     unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
  
     if (maxTime != 1) // ACL split does not work with only one element to split.
     {
         if (!descriptor.m_TimeMajor)
         {
             statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,
                                                          splitterOutputsTensorInfosPtr,
                                                          aclAxisSplit);
         } else
         {
             statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
         }
     }
  
     //
     // LSTM validate
     //
  
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
  
     unsigned int numUnits = cellStateIn.GetShape()[1];
     unsigned int scratchBufferFactor = 4;
  
     if (descriptor.m_CifgEnabled)
     {
         // scratchBuffer = { batchSize, numUnits * 3 } with CIFG
        scratchBufferFactor = 3;
     }
  
     const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());
  
  
     lstm_params_info.set_cell_clip_params(descriptor.m_ClippingThresCell);
     lstm_params_info.set_projection_clip_params(descriptor.m_ClippingThresProj);
     // The inputs and outputs
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
  
     // Basic parameters
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
  
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclInputGateBiasInfo;
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
  
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
  
     if (!descriptor.m_CifgEnabled)
     {
         if (descriptor.m_PeepholeEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
  
         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
                                          &aclRecurrentToInputWeightsInfo,
                                          descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
                                          &aclInputGateBiasInfo);
     }
  
     if (descriptor.m_ProjectionEnabled)
     {
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
  
         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
                                                paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
     }
  
     if (descriptor.m_PeepholeEnabled)
     {
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
  
         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
     }
  
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
  
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
                                                         &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
     }
  
     lstm_params_info.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
                                              descriptor.m_ForgetIntermediateScale,
                                              descriptor.m_CellIntermediateScale,
                                              descriptor.m_OutputIntermediateScale);
  
     lstm_params_info.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
  
     for (unsigned int i = 0; i != maxTime; ++i)
     {
  
         // Set LSTM input and output ITensors depending on:
         // input format (timeMajor) & number of LSTM batches (maxTime).
         arm_compute::ITensorInfo* outputLSTM;
         arm_compute::ITensorInfo* inputLSTM;
  
         // If there is only one LSTM time major batch, we will not concat OR permute.
         // Set input of LSTM to be first input ITensor.
         // Set output of LSTM to be final output ITensor.
         // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
         if (maxTime == 1 && descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
             TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
  
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
  
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
  
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
  
             const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
             outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
         }
         // If there is only one LSTM batch major batch, we will not concat, only permute.
         // Set input of LSTM to be output of initial permute.
         // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
         // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
         else if (maxTime == 1 && !descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = &aclPermuteOutInfo;
  
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
         // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
         else
         {
             inputLSTM = splitterOutputsTensorInfosPtr[i];
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
  
         statusLSTM = arm_compute::NEQLSTMLayer::validate(inputLSTM,
                                                          &aclInputToForgetWeightsInfo,
                                                          &aclInputToCellWeightsInfo,
                                                          &aclInputToOutputWeightsInfo,
                                                          &aclRecurrentToForgetWeightsInfo,
                                                          &aclRecurrentToCellWeightsInfo,
                                                          &aclRecurrentToOutputWeightsInfo,
                                                          &aclForgetGateBiasInfo,
                                                          &aclCellBiasInfo,
                                                          &aclOutputGateBiasInfo,
                                                          &aclCellStateInInfo,
                                                          &aclOutputStateInInfo,
                                                          &aclCellStateOutInfo,
                                                          &aclOutputStateOutInfo,
                                                          outputLSTM,
                                                          lstm_params_info);
     }
  
     //
     // Concat validate
     //
  
     // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
     TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
     TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
     TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
  
     TensorInfo concatOutputTensorInfo = TensorInfo(output);
     concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
     arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
  
     if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
     {
         for (unsigned int i = 0; i < maxTime; ++i)
         {
             auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
             concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
         }
  
         unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
         if (!descriptor.m_TimeMajor)
         {
             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclConcatOutputTensorInfo,
                                                                      aclAxisConcat);
         }
         else
         {
             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclOutputInfo,
                                                                      aclAxisConcat);
         }
     }
     // If only one LSTM batch, we do not concat and/or permute.
     // Must ensure final output info is expanded to correct batch major dimensions.
     else
     {
         if (!descriptor.m_TimeMajor)
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandBatchMajor));
         }
         else
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandTimeMajor));
         }
     }
  
     //
     // Permute validate
     //
     if (!descriptor.m_TimeMajor)
     {
         // Output now time major. Permute output back to batch major.
         if (maxTime != 1)
         {
             statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
         else
         {
             statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
     }
  
     auto okCode = arm_compute::ErrorCode::OK;
     if (statusPermute1.error_code() == okCode &&
         statusSplit.error_code()    == okCode &&
         statusLSTM .error_code()    == okCode &&
         statusConcat.error_code()   == okCode &&
         statusPermute2.error_code() == okCode)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All Unidirectional Sequence LSTM layer validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "Unidirectional Sequence LSTM layer validate status failed.");
     }
 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by NeonLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ NextIndex()

bool armnn::NextIndex	(	const unsigned int	numDims,
		const armnn::TensorShape &	dims,
		std::vector< unsigned int > &	current
	)

Definition at line 19 of file Reduce.cpp.

 {
     unsigned int carry = 1;
  
     for (unsigned int idx = numDims; idx-- > 0; )
     {
         unsigned int current_val = current[idx] + carry;
         if (dims[idx] == current_val)
         {
             current[idx] = 0;
         }
         else
         {
             current[idx] = current_val;
             carry = 0;
             break;
         }
     }
     return (carry == 0);
 }

Referenced by Reduce().

◆ NonMaxSuppression()

std::vector< unsigned int > NonMaxSuppression	(	unsigned int	numBoxes,
		const std::vector< float > &	boxCorners,
		const std::vector< float > &	scores,
		float	nmsScoreThreshold,
		unsigned int	maxDetection,
		float	nmsIouThreshold
	)

Definition at line 50 of file DetectionPostProcess.cpp.

 {
     // Select boxes that have scores above a given threshold.
     std::vector<float> scoresAboveThreshold;
     std::vector<unsigned int> indicesAboveThreshold;
     for (unsigned int i = 0; i < numBoxes; ++i)
     {
         if (scores[i] >= nmsScoreThreshold)
         {
             scoresAboveThreshold.push_back(scores[i]);
             indicesAboveThreshold.push_back(i);
         }
     }
  
     // Sort the indices based on scores.
     unsigned int numAboveThreshold = armnn::numeric_cast<unsigned int>(scoresAboveThreshold.size());
     std::vector<unsigned int> sortedIndices = GenerateRangeK(numAboveThreshold);
     TopKSort(numAboveThreshold, sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold);
  
     // Number of output cannot be more than max detections specified in the option.
     unsigned int numOutput = std::min(maxDetection, numAboveThreshold);
     std::vector<unsigned int> outputIndices;
     std::vector<bool> visited(numAboveThreshold, false);
  
     // Prune out the boxes with high intersection over union by keeping the box with higher score.
     for (unsigned int i = 0; i < numAboveThreshold; ++i)
     {
         if (outputIndices.size() >= numOutput)
         {
             break;
         }
         if (!visited[sortedIndices[i]])
         {
             outputIndices.push_back(indicesAboveThreshold[sortedIndices[i]]);
             for (unsigned int j = i + 1; j < numAboveThreshold; ++j)
             {
                 unsigned int iIndex = indicesAboveThreshold[sortedIndices[i]] * 4;
                 unsigned int jIndex = indicesAboveThreshold[sortedIndices[j]] * 4;
                 if (IntersectionOverUnion(&boxCorners[iIndex], &boxCorners[jIndex]) > nmsIouThreshold)
                 {
                     visited[sortedIndices[j]] = true;
                 }
             }
         }
     }
     return outputIndices;
 }

References GenerateRangeK(), IntersectionOverUnion(), and TopKSort().

Referenced by DetectionPostProcess().

◆ numeric_cast() [1/9]

std::enable_if_t< std::is_unsigned<Source>::value && std::is_unsigned<Dest>::value, Dest> armnn::numeric_cast ( Source source )

Definition at line 35 of file NumericCast.hpp.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (source > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to "
                                         "narrower unsigned type. Overflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
  
     return static_cast<Dest>(source);
 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [2/9]

std::enable_if_t< std::is_signed<Source>::value && std::is_integral<Source>::value && std::is_signed<Dest>::value && std::is_integral<Dest>::value, Dest> armnn::numeric_cast ( Source source )

Definition at line 58 of file NumericCast.hpp.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (source > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
                                         "Overflow detected.");
     }
  
     if (source < std::numeric_limits<Dest>::lowest())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
                                         "Underflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
  
     return static_cast<Dest>(source);
 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [3/9]

std::enable_if_t< std::is_floating_point<Source>::value && std::is_floating_point<Dest>::value, Dest> armnn::numeric_cast ( Source source )

Definition at line 83 of file NumericCast.hpp.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (source > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
                                         "Overflow detected.");
     }
  
     if (source < std::numeric_limits<Dest>::lowest())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
                                         "Underflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
  
     return static_cast<Dest>(source);
 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [4/9]

std::enable_if_t< std::is_floating_point<Source>::value && std::is_signed<Dest>::value && std::is_integral<Dest>::value, Dest> armnn::numeric_cast ( Source source )

Definition at line 109 of file NumericCast.hpp.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (source > static_cast<Source>(std::numeric_limits<Dest>::max()))
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
                                         "Overflow detected.");
     }
  
     if (source < static_cast<Source>(std::numeric_limits<Dest>::lowest()))
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
                                         "Underflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
  
     return static_cast<Dest>(source);
 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [5/9]

std::enable_if_t< std::is_signed<Source>::value && std::is_integral<Source>::value && std::is_floating_point<Dest>::value, Dest> armnn::numeric_cast ( Source source )

Definition at line 135 of file NumericCast.hpp.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     Dest sourceConverted = static_cast<Dest>(source);
  
     if (sourceConverted > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
                                         "Overflow detected.");
     }
  
     if (sourceConverted < std::numeric_limits<Dest>::lowest())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
                                         "Underflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
  
     return static_cast<Dest>(source);
 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [6/9]

std::enable_if_t< std::is_signed<Dest>::value && std::is_integral<Dest>::value && std::is_unsigned<Source>::value, Dest> armnn::numeric_cast ( Source sValue )

Definition at line 165 of file NumericCast.hpp.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (sValue > static_cast< typename std::make_unsigned<Dest>::type >(std::numeric_limits<Dest>::max()))
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to signed type. "
                                         "Overflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
  
     return static_cast<Dest>(sValue);
 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [7/9]

std::enable_if_t< std::is_floating_point<Dest>::value && std::is_unsigned<Source>::value, Dest> armnn::numeric_cast ( Source sValue )

Definition at line 184 of file NumericCast.hpp.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (static_cast<Dest>(sValue) > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to floating point type. "
                                         "Overflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
  
     return static_cast<Dest>(sValue);
 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [8/9]

std::enable_if_t< std::is_unsigned<Dest>::value && std::is_signed<Source>::value && std::is_integral<Source>::value, Dest> armnn::numeric_cast ( Source sValue )

Definition at line 206 of file NumericCast.hpp.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (sValue < 0)
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
                                         "Underflow detected.");
     }
  
     if (static_cast< typename std::make_unsigned<Source>::type >(sValue) > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to unsigned type. "
                                         "Overflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
     return static_cast<Dest>(sValue);
 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [9/9]

std::enable_if_t< std::is_unsigned<Dest>::value && std::is_floating_point<Source>::value, Dest> armnn::numeric_cast ( Source sValue )

Definition at line 230 of file NumericCast.hpp.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (sValue < 0)
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
                                         "Underflow detected.");
     }
  
     if (sValue > static_cast<Source>(std::numeric_limits<Dest>::max()))
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to unsigned type. "
                                         "Overflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
     return static_cast<Dest>(sValue);
 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ Offset()

unsigned int armnn::Offset	(	const TensorShape &	shape,
		unsigned int	batch,
		unsigned int	height,
		unsigned int	width,
		unsigned int	channels,
		const DataLayoutIndexed &	dataLayout
	)

Definition at line 15 of file BatchToSpaceNd.cpp.

 {
     // 3D Tensors
     unsigned int channelDimension3D = dataLayout.GetDataLayout() == DataLayout::NCHW ? 1 : 2;
     if (shape.GetNumDimensions() == 3)
     {
         return (batch * shape[dataLayout.GetHeightIndex()] + height) * shape[channelDimension3D] + channels;
     }
     // 4D Tensors
     else if (shape.GetNumDimensions() == 4)
     {
         if (dataLayout.GetDataLayout() == DataLayout::NHWC)
         {
             return ((batch * shape[dataLayout.GetHeightIndex()] + height) *
                     shape[dataLayout.GetWidthIndex()] + width) *
                     shape[dataLayout.GetChannelsIndex()] + channels;
         }
         else
         {
             return ((batch * shape[dataLayout.GetChannelsIndex()] + channels) *
                     shape[dataLayout.GetHeightIndex()] + height) *
                     shape[dataLayout.GetWidthIndex()] + width;
         }
     }
     else
     {
         throw InvalidArgumentException("Tensor rank must be either 3 or 4", CHECK_LOCATION());
     }
 }

References CHECK_LOCATION, DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumDimensions(), and DataLayoutIndexed::GetWidthIndex().

Referenced by BatchToSpaceNd().

◆ operator<<() [1/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const armnn::TensorShape &	shape
	)

inline

Definition at line 335 of file TypesUtils.hpp.

 {
     os << "[";
     if (shape.GetDimensionality() != Dimensionality::NotSpecified)
     {
         for (uint32_t i = 0; i < shape.GetNumDimensions(); ++i)
         {
             if (i != 0)
             {
                 os << ",";
             }
             if (shape.GetDimensionSpecificity(i))
             {
                 os << shape[i];
             }
             else
             {
                 os << "?";
             }
         }
     }
     else
     {
         os << "Dimensionality Not Specified";
     }
     os << "]";
     return os;
 }

References TensorShape::GetDimensionality(), TensorShape::GetDimensionSpecificity(), TensorShape::GetNumDimensions(), and NotSpecified.

◆ operator<<() [2/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const BackendId &	id
	)

inline

Definition at line 176 of file BackendId.hpp.

 {
     os << id.Get();
     return os;
 }

◆ operator<<() [3/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const BackendVersion &	backendVersion
	)

inline

Definition at line 70 of file IBackendInternal.hpp.

 {
     os << "[" << backendVersion.m_Major << "." << backendVersion.m_Minor << "]";
  
     return os;
 }

References BackendVersion::m_Major, and BackendVersion::m_Minor.

◆ operator<<() [4/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const BFloat16 &	b
	)

inline

Definition at line 122 of file BFloat16.hpp.

 {
     os << b.ToFloat32() << "(0x" << std::hex << b.Val() << ")";
     return os;
 }

References BFloat16::ToFloat32(), and BFloat16::Val().

◆ operator<<() [5/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const Compute &	compute
	)

inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 69 of file BackendId.hpp.

 {
     os << GetComputeDeviceAsCString(compute);
     return os;
 }

References GetComputeDeviceAsCString().

◆ operator<<() [6/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const std::set< Compute > &	compute
	)

inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 58 of file BackendId.hpp.

 {
     for (const Compute& comp : compute)
     {
         os << GetComputeDeviceAsCString(comp) << " ";
     }
     return os;
 }

References GetComputeDeviceAsCString().

◆ operator<<() [7/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const std::vector< Compute > &	compute
	)

inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 47 of file BackendId.hpp.

 {
     for (const Compute& comp : compute)
     {
         os << GetComputeDeviceAsCString(comp) << " ";
     }
     return os;
 }

References GetComputeDeviceAsCString().

◆ operator<<() [8/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const TContainer< BackendId, TContainerTemplateArgs... > &	ids
	)

Definition at line 183 of file BackendId.hpp.

 {
     os << '[';
     for (const auto& id : ids) { os << id << " "; }
     os << ']';
     return os;
 }

◆ operator<<() [9/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		Status	stat
	)

inline

Definition at line 328 of file TypesUtils.hpp.

 {
     os << GetStatusAsCString(stat);
     return os;
 }

References GetStatusAsCString().

◆ Optimize() [1/4]

IOptimizedNetworkPtr Optimize	(	const Graph &	inGraph,
		const std::vector< BackendId > &	backendPreferences,
		const IDeviceSpec &	deviceSpec,
		const OptimizerOptions &	options,
		Optional< std::vector< std::string > & >	messages = `EmptyOptional()`
	)

Accept legacy OptimizerOptions.

Definition at line 1883 of file Network.cpp.

 {
     return Optimize(inGraph,
                     backendPreferences,
                     deviceSpec,
                     OptimizerOptionsOpaque(options),
                     messages);
 }

References Optimize().

◆ Optimize() [2/4]

IOptimizedNetworkPtr Optimize	(	const Graph &	inGraph,
		const std::vector< BackendId > &	backendPreferences,
		const IDeviceSpec &	deviceSpec,
		const OptimizerOptionsOpaque &	options,
		Optional< std::vector< std::string > & >	messages = `EmptyOptional()`
	)

Create an optimized version of the network.

Parameters

inGraph	Graph to be optimized.
backendPreferences	The choice of the backend ordered by user preferences.
deviceSpec	DeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
messages	If there are failures or warnings a string describing same will be added to the vector
options	OptimizerOptions object with optimizer configuration options

Returns: An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from armnn::Exception if process fails.

Definition at line 1896 of file Network.cpp.

 {
     ARMNN_LOG(debug) << options.ToString();
  
     // Enable profiling
     auto profiler = inGraph.GetProfiler();
     ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
     profiler->EnableProfiling(options.GetProfilingEnabled());
  
     // Some backends don't play well together. Check here before continuing.
     {
         std::set<BackendId> backendSet(backendPreferences.begin(), backendPreferences.end());
         // GpuFsa cannot co-exist with GpuAcc.
         if (backendSet.find("GpuFsa") != backendSet.end() &&
             backendSet.find("GpuAcc") != backendSet.end())
         {
             throw InvalidArgumentException("The backends \"GpuAcc\" and \"GpuFsa\" cannot be specified "
                                            "for the same optimized network.");
         }
     }
  
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer");
     if (backendPreferences.empty())
     {
         throw InvalidArgumentException("Invoked Optimize with no backends specified");
     }
  
     if (options.GetReduceFp32ToBf16())
     {
         throw InvalidArgumentException("BFloat16 optimization is currently ignored. In order to use Bf16 optimization "
                                        "Please use the FastMathEnabled backend option for CpuAcc or GpuAcc.");
     }
  
     if (options.GetReduceFp32ToFp16() && options.GetReduceFp32ToBf16())
     {
         throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
     }
  
     // Ensure TensorInfo is set on all output slots of ConstantLayers in the graph
     inGraph.VerifyConstantLayerSetTensorInfo();
  
     std::unique_ptr<Graph> graph = std::make_unique<Graph>(inGraph);
  
     // We need to pass on the information about whether import and export is enabled to the LoadNetwork phase.
     // The mechanism to do that is to add model options to the optimized network.
     armnn::BackendOptions importExport("Global",
                                         {{"ImportEnabled", options.GetImportEnabled()},
                                          {"ExportEnabled", options.GetExportEnabled()}});
     ModelOptions optimizedOptions(options.GetModelOptions());
     optimizedOptions.push_back(importExport);
  
     auto optNet = IOptimizedNetworkPtr(new IOptimizedNetwork(std::move(graph), optimizedOptions),
                                        &IOptimizedNetwork::Destroy);
  
     IOptimizedNetwork* optNetObjPtr = optNet.get();
  
     // Get the optimized graph
     Graph& optGraph = optNetObjPtr->pOptimizedNetworkImpl->GetGraph();
  
     if(options.GetShapeInferenceMethod() == ShapeInferenceMethod::InferAndValidate)
     {
         // Infer the tensor infos for all output slots. Throws an exception on failure
         optGraph.InferTensorInfos();
     }
  
     // Perform BroadcastToOptimizationLayer and then AddBroadcastReshapeLayer optimisation
     using namespace optimizations;
     Optimizer::Pass(optGraph, MakeOptimizations(BroadcastToOptimizationLayer()));
  
     Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer()));
  
     if(options.GetShapeInferenceMethod() == ShapeInferenceMethod::ValidateOnly)
     {
         // Validate the tensor infos for all output slots. Throws an exception on failure
         optGraph.InferTensorInfos();
     }
  
  
     // Group Constant Layer optimizations together where possible.
     // This is important as:
     // FusePermuteIntoConstantLayer must happen before FoldPadIntoDepthwiseConvolution2d and
     // FuseBatchNormIntoDepthwiseConvolution2D.
     // ConvertConstDequantisationLayersToConstLayers must happen before FoldPadIntoConvolution2d
     Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer(),
                                                 ConvertConstDequantisationLayersToConstLayers()));
     // Perform optimisation passes
     Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
                                                 SquashEqualTransposeSiblings(),
                                                 SquashEqualReshapeSiblings(),
                                                 OptimizeInversePermutes(),
                                                 OptimizeInverseTransposes(),
                                                 MovePermuteUp(),
                                                 MoveTransposeUp(),
                                                 PermuteAsReshape(),
                                                 TransposeAsReshape(),
                                                 OptimizeConsecutiveReshapes(),
                                                 FoldPadIntoConvolution2d(),
                                                 FoldPadIntoDepthwiseConvolution2d(),
                                                 FoldPadIntoPooling2d(),
                                                 BroadcastToOptimizationLayer(),
                                                 PermuteAndBatchToSpaceAsDepthToSpace(),
                                                 TransposeAndBatchToSpaceAsDepthToSpace(),
                                                 FuseBatchNormIntoConvolution2DFloat32(),
                                                 FuseBatchNormIntoConvolution2DFloat16(),
                                                 FuseBatchNormIntoDepthwiseConvolution2DFloat32(),
                                                 FuseBatchNormIntoDepthwiseConvolution2DFloat16()));
  
     // Initialize backend settings
     BackendSettings backendSettings(backendPreferences, deviceSpec);
     auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
     if (availablePreferredBackends.empty())
     {
         std::stringstream failureMsg;
         failureMsg << "None of the preferred backends " << backendPreferences
                    << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
         ReportError(failureMsg.str(), messages);
         throw InvalidArgumentException(failureMsg.str());
     }
  
     // Create a map to temporarily hold initialized backend objects
     TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
     BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
  
     if (options.GetReduceFp32ToFp16())
     {
         bool hasFp16 = CheckFp16Support(backends, availablePreferredBackends);
         if (hasFp16)
         {
             ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
             Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
             Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
         }
     }
  
     // Assign an available backend to each layer
     Graph::Iterator firstLayer = optGraph.begin();
     Graph::Iterator lastLayer  = optGraph.end();
     OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr->pOptimizedNetworkImpl.get(),
                                                              backendSettings,
                                                              firstLayer,
                                                              lastLayer,
                                                              messages);
     if (assignBackendsResult.m_Error)
     {
         // Failed to assign a backend to each layer
         throw InvalidArgumentException("Failed to assign a backend to each layer");
     }
  
     Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
                                                 OptimizeInverseConversionsFp32()));
  
     // Apply the backend-specific optimizations
     OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr->pOptimizedNetworkImpl.get(),
                                                                              backendSettings,
                                                                              backends,
                                                                              options.GetModelOptions(),
                                                                              messages);
     if (backendOptimizationResult.m_Error)
     {
         // Failed to apply the backend-specific optimizations
         throw InvalidArgumentException("Failed to apply the backend-specific optimizations");
     }
  
     // Convert constants
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ConvertConstants");
         Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
         Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
     }
  
     // This must occur after all topological changes to the graph and any redirection of variables
     // If the debug flag is set, then insert a DebugLayer after each layer
     // Doing this after applying the backend optimizations as they might have changed some layers
     if (options.GetDebugEnabled() && !options.GetDebugToFileEnabled())
     {
         Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
     }
     else if (options.GetDebugToFileEnabled())
     {
         // Setup the output file path
         try
         {
 #if !defined(ARMNN_DISABLE_FILESYSTEM)
             auto result = armnnUtils::Filesystem::CreateDirectory("/ArmNNIntermediateLayerOutputs");
             ARMNN_LOG(info) << "Intermediate tensors will be written to: " << result;
 #endif
             Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugToFileLayer()));
         }
         catch (const armnn::RuntimeException& e)
         {
             // If we cannot create the output directory then we'll issue a warning and continue.
             ARMNN_LOG(warning) << "Unable to print intermediate layer outputs : " << e.what();
         }
     }
  
     // Calculate the compatibility strategies for tensor handles
     OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
                                                                    backends,
                                                                    tensorHandleFactoryRegistry,
                                                                    options.GetImportEnabled(),
                                                                    options.GetExportEnabled(),
                                                                    messages);
  
     if (strategyResult.m_Error)
     {
         // Failed to apply the backend-specific optimizations
         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
     }
  
     // Based on the tensor handle strategy determined above, insert copy layers where required.
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AddCompatibilityLayers");
         optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
     }
  
     return optNet;
 }

◆ Optimize() [3/4]

IOptimizedNetworkPtr Optimize	(	const INetwork &	network,
		const std::vector< BackendId > &	backendPreferences,
		const IDeviceSpec &	deviceSpec,
		const OptimizerOptions &	options,
		Optional< std::vector< std::string > & >	messages = `EmptyOptional()`
	)

Accept legacy OptimizerOptions.

Definition at line 2119 of file Network.cpp.

 {
     return Optimize(inNetwork,
                     backendPreferences,
                     deviceSpec,
                     OptimizerOptionsOpaque(options),
                     messages);
 }

References Optimize().

◆ Optimize() [4/4]

IOptimizedNetworkPtr Optimize	(	const INetwork &	network,
		const std::vector< BackendId > &	backendPreferences,
		const IDeviceSpec &	deviceSpec,
		const OptimizerOptionsOpaque &	options = `OptimizerOptionsOpaque()`,
		Optional< std::vector< std::string > & >	messages = `EmptyOptional()`
	)

Create an optimized version of the network.

Parameters

network	INetwork description of the network to be optimized.
backendPreferences	The choice of the backend ordered by user preferences.
deviceSpec	DeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
messages	If there are failures or warnings a string describing same will be added to the vector
options	OptimizerOptions object with optimizer configuration options

Returns: An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from armnn::Exception if process fails.

Examples: AsyncExecutionSample.cpp, CustomMemoryAllocatorSample.cpp, DynamicSample.cpp, and SimpleSample.cpp.

Definition at line 2132 of file Network.cpp.

 {
     return Optimize(inNetwork.pNetworkImpl->GetGraph(),
                     backendPreferences,
                     deviceSpec,
                     options,
                     messages);
 }

References INetwork::pNetworkImpl.

Referenced by Optimize(), ArmnnDriverImpl::PrepareArmnnModel(), ArmnnDriverImpl::PrepareArmnnModelFromCache(), ParserPrototxtFixture< TParser >::Setup(), and ParserPrototxtFixture< TParser >::SetupOptimizedNetwork().

◆ Pad()

void Pad	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const ITensorHandle *	inputHandle,
		ITensorHandle *	outputHandle,
		const PadQueueDescriptor &	data
	)

Definition at line 39 of file Pad.cpp.

 {
     auto padList  = data.m_Parameters.m_PadList;
     auto padValue = data.m_Parameters.m_PadValue;
  
     unsigned int numOutputElements = outputInfo.GetNumElements();
  
     TensorShape outputShape = outputInfo.GetShape();
     TensorShape inputShape  = inputInfo.GetShape();
  
     unsigned int numInputDimensions = inputShape.GetNumDimensions();
  
 #ifndef NDEBUG
  
     unsigned int numOutputDimensions = outputShape.GetNumDimensions();
     assert(numInputDimensions == numOutputDimensions);
  
 #endif
  
     unsigned int inputBatches  = 0;
     unsigned int inputChannels = 0;
     unsigned int inputHeight   = 0;
     unsigned int inputWidth    = 0;
  
     unsigned int outputChannels = 0;
     unsigned int outputHeight   = 0;
     unsigned int outputWidth    = 0;
  
     auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
     auto outData   = MakeEncoder<float>(outputInfo, outputHandle->Map());
  
     // Fill the output tensor with Pad value first
     if (outputInfo.IsQuantized())
     {
         // For Quantized types Pad Value should not be quantized with scale and offset of the tensor info
         auto temporaryInfo = TensorInfo(outputInfo.GetShape(), outputInfo.GetDataType(), 1.0f, 0);
         auto outputData = MakeEncoder<float>(temporaryInfo, outputHandle->Map());
         FillOutputWithPadValue(*outputData, padValue, numOutputElements);
     }
     else
     {
         FillOutputWithPadValue(*outData, padValue, numOutputElements);
     }
  
     Decoder<float>& input  = *inputData;
     Encoder<float>& output = *outData;
  
     switch(numInputDimensions) {
  
         case 1:
             inputWidth = inputShape[0];
             for (unsigned int w = 0; w < inputWidth ; w++)
             {
                 input[w];
                 auto inputValue = input.Get();
                 auto outputIndex = w + std::get<0>(padList[0]);
                 output[outputIndex];
                 output.Set(inputValue);
             }
  
             break;
         case 2  :
             inputHeight = inputShape[0];
             inputWidth  = inputShape[1];
             outputWidth = outputShape[1];
  
             for (unsigned int h = 0; h < inputHeight; h++)
             {
                 for (unsigned int w = 0; w < inputWidth ; w++)
                 {
                     input[h * inputWidth + w];
                     auto inputValue  = input.Get();
                     auto outputIndex = (h + std::get<0>(padList[0])) * outputWidth + (w + std::get<0>(padList[1]));
                     output[outputIndex];
                     output.Set(inputValue);
                 }
             }
  
             break;
         case 3  :
             inputChannels = inputShape[0];
             inputHeight   = inputShape[1];
             inputWidth    = inputShape[2];
             outputHeight  = outputShape[1];
             outputWidth   = outputShape[2];
  
             for (unsigned int c = 0; c < inputChannels; c++)
             {
                 for (unsigned int h = 0; h < inputHeight; h++)
                 {
                     for (unsigned int w = 0; w < inputWidth ; w++)
                     {
                         input[c * inputHeight * inputWidth + h * inputWidth + w];
                         auto inputValue  = input.Get();
                         auto outputIndex = (c + std::get<0>(padList[0])) * outputHeight * outputWidth
                                            + (h + std::get<0>(padList[1])) * outputWidth
                                            + (w + std::get<0>(padList[2]));
                         output[outputIndex];
                         output.Set(inputValue);
                     }
                 }
             }
  
             break;
         case 4  :
             inputBatches   = inputShape[0];
             inputChannels  = inputShape[1];
             inputHeight    = inputShape[2];
             inputWidth     = inputShape[3];
             outputChannels = outputShape[1];
             outputHeight   = outputShape[2];
             outputWidth    = outputShape[3];
  
             for (unsigned int b = 0; b < inputBatches; b++)
             {
                 for (unsigned int c = 0; c < inputChannels; c++)
                 {
                     for (unsigned int h = 0; h < inputHeight; h++)
                     {
                         for (unsigned int w = 0; w < inputWidth ; w++)
                         {
                             input[b * inputChannels * inputHeight * inputWidth
                                       + c * inputHeight * inputWidth
                                       + h * inputWidth
                                       + w];
                             auto inputValue  = input.Get();
                             auto outputIndex = (b + std::get<0>(padList[0]))
                                                * outputChannels * outputHeight * outputWidth
                                                + (c + std::get<0>(padList[1])) * outputHeight * outputWidth
                                                + (h + std::get<0>(padList[2])) * outputWidth
                                                + (w + std::get<0>(padList[3]));
                             output[outputIndex];
                             output.Set(inputValue);
                         }
                     }
                 }
             }
  
             break;
         default :
             break;
     }
 }

References Decoder< IType >::Get(), TensorInfo::GetDataType(), TensorShape::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), TensorInfo::IsQuantized(), PadDescriptor::m_PadList, PadDescriptor::m_PadValue, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, ITensorHandle::Map(), and Encoder< IType >::Set().

◆ ParseBooleanBackendOption()

bool armnn::ParseBooleanBackendOption	(	const armnn::BackendOptions::Var &	value,
		bool	defaultValue
	)

inline

Definition at line 312 of file BackendOptions.hpp.

 {
     if (value.IsBool())
     {
         return value.AsBool();
     }
     return defaultValue;
 }

References BackendOptions::Var::AsBool(), and BackendOptions::Var::IsBool().

◆ ParseComputeDevice()

constexpr armnn::Compute armnn::ParseComputeDevice ( const char * str )

constexpr

Deprecated function that will be removed together with the Compute enum.

Definition at line 213 of file TypesUtils.hpp.

 {
     if (armnn::StrEqual(str, "CpuAcc"))
     {
         return armnn::Compute::CpuAcc;
     }
     else if (armnn::StrEqual(str, "CpuRef"))
     {
         return armnn::Compute::CpuRef;
     }
     else if (armnn::StrEqual(str, "GpuAcc"))
     {
         return armnn::Compute::GpuAcc;
     }
     else
     {
         return armnn::Compute::Undefined;
     }
 }

References CpuAcc, CpuRef, GpuAcc, StrEqual(), and Undefined.

◆ ParseIntBackendOption()

int armnn::ParseIntBackendOption	(	const armnn::BackendOptions::Var &	value,
		int	defaultValue
	)

inline

Definition at line 330 of file BackendOptions.hpp.

 {
     if (value.IsInt())
     {
         return value.AsInt();
     }
     return defaultValue;
 }

References BackendOptions::Var::AsInt(), and BackendOptions::Var::IsInt().

Referenced by ClBackendModelContext::ClBackendModelContext().

◆ ParseOptions()

void armnn::ParseOptions	(	const std::vector< BackendOptions > &	options,
		BackendId	backend,
		F	f
	)

Definition at line 297 of file BackendOptions.hpp.

 {
     for (auto optionsGroup : options)
     {
         if (optionsGroup.GetBackendId() == backend)
         {
             for (size_t i=0; i < optionsGroup.GetOptionCount(); i++)
             {
                 const BackendOptions::BackendOption option = optionsGroup.GetOption(i);
                 f(option.GetName(), option.GetValue());
             }
         }
     }
 }

References BackendOptions::BackendOption::GetName(), and BackendOptions::BackendOption::GetValue().

Referenced by ClBackendContext::ClBackendContext(), ClBackendModelContext::ClBackendModelContext(), GpuFsaBackendContext::GpuFsaBackendContext(), NeonBackendModelContext::NeonBackendModelContext(), and RuntimeImpl::RuntimeImpl().

◆ ParseStringBackendOption()

std::string armnn::ParseStringBackendOption	(	const armnn::BackendOptions::Var &	value,
		std::string	defaultValue
	)

inline

Definition at line 321 of file BackendOptions.hpp.

 {
     if (value.IsString())
     {
         return value.AsString();
     }
     return defaultValue;
 }

References BackendOptions::Var::AsString(), and BackendOptions::Var::IsString().

Referenced by ClBackendContext::ClBackendContext(), and GpuFsaBackendContext::GpuFsaBackendContext().

◆ ParseTuningLevel()

TuningLevel armnn::ParseTuningLevel	(	const BackendOptions::Var &	value,
		TuningLevel	defaultValue
	)

inline

Definition at line 26 of file ArmComputeTuningUtils.hpp.

 {
     if (value.IsInt())
     {
         int v = value.AsInt();
         if (v > static_cast<int>(TuningLevel::Exhaustive) ||
             v < static_cast<int>(TuningLevel::None))
         {
             ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. "
                                   "Using default(" << static_cast<int>(defaultValue) << ")";
         } else
         {
             return static_cast<TuningLevel>(v);
         }
     }
     return defaultValue;
 }

References ARMNN_LOG, BackendOptions::Var::AsInt(), Exhaustive, BackendOptions::Var::IsInt(), None, and warning.

Referenced by ClBackendContext::ClBackendContext(), and GpuFsaBackendContext::GpuFsaBackendContext().

◆ PermuteTensor()

armnn::ConstTensor PermuteTensor	(	const ConstTensorHandle *	tensor,
		const PermutationVector &	permutationVector,
		void *	permuteBuffer
	)

Definition at line 18 of file WorkloadUtils.cpp.

 {
     if (tensor == nullptr)
     {
         throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null input tensor pointer");
     }
     if (permuteBuffer == nullptr)
     {
         throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null permute buffer pointer");
     }
  
     TensorInfo tensorInfo = tensor->GetTensorInfo();
  
     if (permutationVector.GetSize() > 0)
     {
         tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector);
         armnnUtils::Permute(tensorInfo.GetShape(), permutationVector,
                             tensor->GetConstTensor<void>(), permuteBuffer,
                             GetDataTypeSize(tensorInfo.GetDataType()));
     }
     else
     {
         ::memcpy(permuteBuffer, tensor->GetConstTensor<void>(), tensorInfo.GetNumBytes());
     }
     tensorInfo.SetConstant(true);
     return ConstTensor(tensorInfo, permuteBuffer);
 }

References ConstTensorHandle::GetConstTensor(), TensorInfo::GetDataType(), GetDataTypeSize(), TensorInfo::GetNumBytes(), TensorInfo::GetShape(), PermutationVector::GetSize(), ConstTensorHandle::GetTensorInfo(), armnnUtils::Permute(), armnnUtils::Permuted(), and TensorInfo::SetConstant().

Referenced by Convert1HWOTensorToAcl(), Convert1HWOtoMIHW(), and ConvertWeightTensorFromArmnnToAcl().

◆ PolymorphicDowncast()

DestType armnn::PolymorphicDowncast ( SourceType * value )

Polymorphic downcast for build in pointers only.

Usage: Child* pChild = PolymorphicDowncast<Child*>(pBase);

Template Parameters

DestType	Pointer type to the target object (Child pointer type)
SourceType	Pointer type to the source object (Base pointer type)

Parameters

value Pointer to the source object

Returns: Pointer of type DestType (Pointer of type child)

Definition at line 74 of file PolymorphicDowncast.hpp.

 {
     static_assert(std::is_pointer<DestType>::value,
                   "PolymorphicDowncast only works with pointer types.");
  
     ARMNN_POLYMORPHIC_CAST_CHECK(dynamic_cast<DestType>(value) == value);
     return static_cast<DestType>(value);
 }

References ARMNN_POLYMORPHIC_CAST_CHECK.

Referenced by ClLayerSupport::IsLayerSupported(), and IsLayerTypeSupported().

◆ PolymorphicPointerDowncast()

auto armnn::PolymorphicPointerDowncast ( const SourceType & value )

Polymorphic downcast for shared pointers and build in pointers.

Usage: auto pChild = PolymorphicPointerDowncast<Child>(pBase)

Template Parameters

DestType	Type of the target object (Child type)
SourceType	Pointer type to the source object (Base (shared) pointer type)

Parameters

value Pointer to the source object

Returns: Pointer of type DestType ((Shared) pointer of type child)

Definition at line 93 of file PolymorphicDowncast.hpp.

 {
     ARMNN_POLYMORPHIC_CAST_CHECK(utility::DynamicPointerCast<DestType>(value)
                                  == value);
     return utility::StaticPointerCast<DestType>(value);
 }

References ARMNN_POLYMORPHIC_CAST_CHECK.

◆ Pooling2d()

void Pooling2d	(	Decoder< float > &	rInputDecoder,
		Encoder< float > &	rOutputEncoder,
		const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const Pooling2dDescriptor &	params
	)

Computes the Pooling2d operation.

Definition at line 142 of file Pooling2d.cpp.

 {
     const DataLayoutIndexed dataLayout(params.m_DataLayout);
     auto channelsIndex = dataLayout.GetChannelsIndex();
     auto heightIndex = dataLayout.GetHeightIndex();
     auto widthIndex = dataLayout.GetWidthIndex();
  
     const int batchSize    = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
     const int channels     = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
     const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
     const int widthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
     const int heightInput  = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
     const int widthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
     const int padLeft      = armnn::numeric_cast<int>(params.m_PadLeft);
     const int padRight     = armnn::numeric_cast<int>(params.m_PadRight);
     const int padTop       = armnn::numeric_cast<int>(params.m_PadTop);
     const int padBottom    = armnn::numeric_cast<int>(params.m_PadBottom);
     const int strideX      = armnn::numeric_cast<int>(params.m_StrideX);
     const int strideY      = armnn::numeric_cast<int>(params.m_StrideY);
     const int poolHeight   = armnn::numeric_cast<int>(params.m_PoolHeight);
     const int poolWidth    = armnn::numeric_cast<int>(params.m_PoolWidth);
  
     float defaultInitializer = DefaultInitializer(params.m_PoolType);
  
     Accumulator accumulate = GetAccumulator(params.m_PoolType);
     Executor execute       = GetExecutor(params.m_PoolType);
  
     // Check supported padding methods outside the loop to simplify
     // the inner loop.
     if (params.m_PaddingMethod != PaddingMethod::Exclude &&
         params.m_PaddingMethod != PaddingMethod::IgnoreValue)
     {
         throw armnn::InvalidArgumentException("Unsupported padding type");
     }
  
     const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
  
     for (int n = 0; n < batchSize; n++)
     {
         for (int c = 0; c < channels; c++)
         {
             for (int yOutput = 0; yOutput < heightOutput; yOutput++)
             {
                 //  Calculate values independent of the x axis
                 int hstart = (yOutput * strideY) - padTop;
                 int hend = hstart + poolHeight;
                 // Clamp the pooling region inside the valid input area (which includes the padding).
                 // This is necessary because the final pooling in a row may overlap beyond the padding.
                 hend = std::min(hend, heightInput + padBottom);
  
                 int height = hend - hstart;
                 bool hclamped = ClampRange(hstart, hend, heightInput);
  
                 for (int xOutput = 0; xOutput < widthOutput; xOutput++)
                 {
                     int wstart = (xOutput * strideX) - padLeft;
                     int wend = wstart + poolWidth;
  
                     // Clamp the pooling region inside the valid input area (which includes the padding).
                     // This is necessary because the final pooling in a row may overlap beyond the padding.
                     wend = std::min(wend, widthInput + padRight);
  
                     float result = defaultInitializer;
                     float poolAreaSize = armnn::numeric_cast<float>(height * (wend - wstart));
  
                     // Special case: when the pooling kernel is over a padding region and the padding
                     //               size is larger or equal to the kernel and the kernel only covers
                     //               padding and no real values, then we initialize the result as zero
                     //               by convention. This is because we need to choose a value here and
                     //               all values we have are padding, which we ignore.
                     if (OnPaddingOnly(hstart, hend, heightInput) ||
                         OnPaddingOnly(wstart, wend, widthInput))
                     {
                         result = 0.0f;
  
                         int outputIndex;
  
                         if(dataLayout.GetDataLayout() == DataLayout::NHWC)
                         {
                             outputIndex = n * heightOutput * widthOutput * channels +
                                           yOutput * widthOutput * channels +
                                           xOutput * channels +
                                           c;
                         }
                         else
                         {
                             outputIndex = n * heightOutput * widthOutput * channels +
                                           c * heightOutput * widthOutput +
                                           yOutput * widthOutput +
                                           xOutput;
                         }
  
                         rOutputEncoder[static_cast<unsigned int>(outputIndex)];
                         rOutputEncoder.Set(result);
                         continue;
                     }
  
                     bool clamped = hclamped |= ClampRange(wstart, wend, widthInput);
  
                     if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
                     {
                         // When we exclude the padding, it means we calculate with a smaller
                         // kernel size, so I changed the divisor here.
                         poolAreaSize = armnn::numeric_cast<float>((hend - hstart) * (wend - wstart));
                     }
  
                     for (auto yInput = hstart; yInput < hend; yInput++)
                     {
                         for (auto xInput = wstart; xInput < wend; xInput++)
                         {
  
                             int inputIndex;
                             if(dataLayout.GetDataLayout() == DataLayout::NHWC)
                             {
                                 inputIndex = n * heightInput * widthInput * channels +
                                              yInput * widthInput * channels +
                                              xInput * channels +
                                              c;
  
                             }
                             else
                             {
                                 inputIndex = n * heightInput * widthInput * channels +
                                              c * heightInput * widthInput +
                                              yInput * widthInput +
                                              xInput;
                             }
  
                             accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
                         }
                     }
  
                     execute(result, poolAreaSize);
  
                     int outputIndex;
  
                     if(dataLayout.GetDataLayout() == DataLayout::NHWC)
                     {
                         outputIndex = n * heightOutput * widthOutput * channels +
                                       yOutput * widthOutput * channels +
                                       xOutput * channels +
                                       c;
                     }
                     else
                     {
                         outputIndex = n * heightOutput * widthOutput * channels +
                                       c * heightOutput * widthOutput +
                                       yOutput * widthOutput +
                                       xOutput;
                     }
  
                     rOutputEncoder[static_cast<unsigned int>(outputIndex)];
                     rOutputEncoder.Set(result);
                 }
             }
         }
     }
 }

Referenced by Pooling2d(), and Pooling2dLayer::Pooling2dLayer().

◆ Pooling3d()

void Pooling3d	(	Decoder< float > &	rInputDecoder,
		Encoder< float > &	rOutputEncoder,
		const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const Pooling3dDescriptor &	params
	)

Computes the Pooling3d operation.

Definition at line 172 of file Pooling3d.cpp.

 {
     const DataLayoutIndexed dataLayout(params.m_DataLayout);
  
     auto channelsIndex = dataLayout.GetChannelsIndex();
  
     auto depthIndex = dataLayout.GetDepthIndex();
     auto heightIndex = dataLayout.GetHeightIndex();
     auto widthIndex = dataLayout.GetWidthIndex();
  
     const int batchSize    = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
     const int channels     = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
  
     const int depthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[depthIndex]);
     const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
     const int widthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
  
     const int depthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[depthIndex]);
     const int heightInput  = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
     const int widthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
  
     const int padLeft      = armnn::numeric_cast<int>(params.m_PadLeft);
     const int padRight     = armnn::numeric_cast<int>(params.m_PadRight);
     const int padTop       = armnn::numeric_cast<int>(params.m_PadTop);
     const int padBottom    = armnn::numeric_cast<int>(params.m_PadBottom);
     const int padFront     = armnn::numeric_cast<int>(params.m_PadFront);
     const int padBack      = armnn::numeric_cast<int>(params.m_PadBack);
  
     const int strideX      = armnn::numeric_cast<int>(params.m_StrideX);
     const int strideY      = armnn::numeric_cast<int>(params.m_StrideY);
     const int strideZ      = armnn::numeric_cast<int>(params.m_StrideZ);
  
     const int poolHeight   = armnn::numeric_cast<int>(params.m_PoolHeight);
     const int poolWidth    = armnn::numeric_cast<int>(params.m_PoolWidth);
     const int poolDepth    = armnn::numeric_cast<int>(params.m_PoolDepth);
  
     float defaultInitializer = DefaultInitializer(params.m_PoolType);
     Accumulator accumulate = GetAccumulator(params.m_PoolType);
     Executor execute       = GetExecutor(params.m_PoolType);
  
     // Check supported padding methods outside the loop to simplify
     // the inner loop.
     if (params.m_PaddingMethod != PaddingMethod::Exclude &&
         params.m_PaddingMethod != PaddingMethod::IgnoreValue)
     {
         throw armnn::InvalidArgumentException("Unsupported padding type");
     }
  
     const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
  
     for (int n = 0; n < batchSize; n++)
     {
         for (int c = 0; c < channels; c++)
         {
             for (int zOutput = 0; zOutput < depthOutput; zOutput++)
             {
                 //  Calculate values independent of the x and y axis
                 int dstart = (zOutput * strideZ) - padFront;
                 int dend = dstart + poolDepth;
                 // Clamp the pooling region inside the valid input area (which includes the padding).
                 // This is necessary because the final pooling in a row may overlap beyond the padding.
                 dend = std::min(dend, depthInput + padBack);
  
                 int depth = dend - dstart;
                 bool dclamped = ClampRange(dstart, dend, depthInput);
                 int depthClamped = dend - dstart;
  
                 for (int yOutput = 0; yOutput < heightOutput; yOutput++)
                 {
                     int hstart = (yOutput * strideY) - padTop;
                     int hend = hstart + poolHeight;
                     // Clamp the pooling region inside the valid input area (which includes the padding).
                     // This is necessary because the final pooling in a row may overlap beyond the padding.
                     hend = std::min(hend, heightInput + padBottom);
  
                     int height = hend - hstart;
                     bool hclamped = ClampRange(hstart, hend, heightInput);
                     int heightClamped = hend - hstart;
  
                     for (int xOutput = 0; xOutput < widthOutput; xOutput++)
                     {
                         int wstart = (xOutput * strideX) - padLeft;
                         int wend = wstart + poolWidth;
                         // Clamp the pooling region inside the valid input area (which includes the padding).
                         // This is necessary because the final pooling in a row may overlap beyond the padding.
                         wend = std::min(wend, widthInput + padRight);
  
                         int width = wend - wstart;
                         bool wclamped = ClampRange(wstart, wend, widthInput);
                         int widthClamped = wend - wstart;
  
                         float result = defaultInitializer;
                         float poolAreaSize = armnn::numeric_cast<float>(depth * height * width);
  
                         // Special case: when the pooling kernel is over a padding region and the padding
                         //               size is larger or equal to the kernel and the kernel only covers
                         //               padding and no real values, then we initialize the result as zero
                         //               by convention. This is because we need to choose a value here and
                         //               all values we have are padding, which we ignore.
                         if (OnPaddingOnly(dstart, dend, depthInput) ||
                             OnPaddingOnly(hstart, hend, heightInput) ||
                             OnPaddingOnly(wstart, wend, widthInput))
                         {
                             result = 0.0f;
  
                             int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
                                 n, c, zOutput, yOutput, xOutput, dataLayout);
  
                             rOutputEncoder[static_cast<unsigned int>(outputIndex)];
                             rOutputEncoder.Set(result);
  
                             continue;
                         }
  
                         bool clamped = (dclamped | hclamped | wclamped);
  
                         if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
                         {
                             // When we exclude the padding, it means we calculate with a smaller
                             // kernel size, so I changed the divisor here.
                             poolAreaSize = armnn::numeric_cast<float>(depthClamped * heightClamped * widthClamped);
                         }
  
                         for (auto zInput = dstart; zInput < dend; zInput++)
                         {
                             for (auto yInput = hstart; yInput < hend; yInput++)
                             {
                                 for (auto xInput = wstart; xInput < wend; xInput++)
                                 {
  
                                     int inputIndex = CalculateIndex(channels, depthInput, heightInput, widthInput,
                                 n, c, zInput, yInput, xInput, dataLayout);
  
                                     accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
                                 }
                             }
                         }
  
                         execute(result, poolAreaSize);
  
                         int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
                             n, c, zOutput, yOutput, xOutput, dataLayout);
  
                         rOutputEncoder[static_cast<unsigned int>(outputIndex)];
                         rOutputEncoder.Set(result);
                     }
                 }
             }
         }
     }
 }

Referenced by Pooling3d(), and Pooling3dLayer::Pooling3dLayer().

◆ PreluImpl()

void PreluImpl	(	const TensorInfo &	inputInfo,
		const TensorInfo &	alphaInfo,
		const TensorInfo &	outputInfo,
		Decoder< float > &	inputData,
		Decoder< float > &	alphaData,
		Encoder< float > &	outputData
	)

Definition at line 13 of file PreluImpl.cpp.

 {
     const TensorShape& inputShape  = inputInfo.GetShape();
     const TensorShape& alphaShape  = alphaInfo.GetShape();
     const TensorShape& outputShape = outputInfo.GetShape();
  
     // PReLU activation: f(x) = alpha * x for x < 0, f(x) = x for x >= 0
     auto prelu = [](float x, float alpha)
     {
         return x < 0 ? alpha * x : x;
     };
  
     BroadcastLoop(inputShape, alphaShape, outputShape).Unroll(prelu, 0, inputData, alphaData, outputData);
 }

References TensorInfo::GetShape(), and BroadcastLoop::Unroll().

◆ PrintOutput()

void armnn::PrintOutput	(	const TensorInfo &	inputInfo,
		const T *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex,
		std::ostream &	os
	)

Definition at line 23 of file Debug.cpp.

 {
     const unsigned int numDims = inputInfo.GetNumDimensions();
     const unsigned int numElements = inputInfo.GetNumElements();
     const TensorShape& inputShape = inputInfo.GetShape();
  
     std::vector<unsigned int> strides(numDims, 0);
     strides[numDims - 1] = inputShape[numDims - 1];
  
     for (unsigned int i = 2; i <= numDims; i++)
     {
         strides[numDims - i] = strides[numDims - i + 1] * inputShape[numDims - i];
     }
  
     os << "{ ";
     os << "\"layerGuid\": " << guid << ", ";
     os << "\"layerName\": \"" << layerName << "\", ";
     os << "\"outputSlot\": " << slotIndex << ", ";
     os << "\"shape\": ";
  
     os << "[";
     for (unsigned int i = 0; i < numDims; i++)
     {
         os << inputShape[i];
         if (i != numDims - 1)
         {
             os << ", ";
         }
     }
     os << "], ";
  
     os << "\"min\": "
               << static_cast<float>(*std::min_element(inputData, inputData + numElements)) << ", ";
  
     os << "\"max\": "
               << static_cast<float>(*std::max_element(inputData, inputData + numElements)) << ", ";
  
     os << "\"data\": ";
  
     for (unsigned int i = 0; i < numElements; i++)
     {
         for (unsigned int j = 0; j < numDims; j++)
         {
             if (i % strides[j] == 0)
             {
                 os << "[";
             }
         }
  
         os << static_cast<float>(inputData[i]);
  
         for (unsigned int j = 0; j < numDims; j++)
         {
             if ((i + 1) % strides[j] == 0)
             {
                 os << "]";
             }
         }
  
         if (i != numElements - 1)
         {
             os << ", ";
         }
     }
  
     os << " }" << std::endl;
 }

References TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), and TensorInfo::GetShape().

◆ ProfilingUpdateDescriptions()

void armnn::ProfilingUpdateDescriptions	(	const std::string &	name,
		const DescriptorType &	desc,
		const WorkloadInfo &	infos,
		const arm::pipe::ProfilingGuid	guid
	)

inline

< Profiler used

Definition at line 180 of file Profiling.hpp.

 {
     IProfiler* profiler(ProfilerManager::GetInstance().GetProfiler()); ///< Profiler used
     if (profiler && profiler->IsProfilingEnabled())
     {
         profiler->AddLayerDetails(name, desc, infos, guid);
     }
 }

References ProfilerManager::GetInstance(), and IProfiler::IsProfilingEnabled().

◆ Quantize() [1/2]

template int32_t Quantize< int32_t >	(	float	value,
		float	scale,
		int32_t	offset
	)

Quantize a floating point data type into an 8-bit data type.

Explicit specialization of Quantize for int32_t.

Explicit specialization of Quantize for int16_t.

Explicit specialization of Quantize for uint8_t.

Explicit specialization of Quantize for int8_t.

Parameters

value	- The value to quantize.
scale	- The scale (must be non-zero).
offset	- The offset.

Returns: - The quantized value calculated as round(value/scale)+offset.

Definition at line 30 of file TypesUtils.cpp.

 {
     static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
     constexpr QuantizedType max = std::numeric_limits<QuantizedType>::max();
     constexpr QuantizedType min = std::numeric_limits<QuantizedType>::lowest();
     if (scale == 0.f)
     {
         throw armnn::InvalidArgumentException("Quantize: Scale cannot be 0.f");
     }
     if (std::isnan(value))
     {
         throw armnn::InvalidArgumentException("Quantize: Value is NaN");
     }
  
     float clampedValue = std::min(std::max((static_cast<float>(offset) + static_cast<float>(round(value/scale))),
                                             static_cast<float>(min)), static_cast<float>(max));
     auto quantizedBits = static_cast<QuantizedType>(clampedValue);
  
     return quantizedBits;
 }

◆ Quantize() [2/2]

void armnn::Quantize	(	uint8_t *	quant,
		const float *	dequant,
		const TensorInfo &	info
	)

inline

Definition at line 121 of file RefWorkloadUtils.hpp.

 {
     for (size_t i = 0; i < info.GetNumElements(); i++)
     {
         quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
     }
 }

References info.

◆ Reduce()

void Reduce	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		Decoder< float > &	input,
		Encoder< float > &	output,
		const std::vector< uint32_t >	axis,
		const ReduceOperation	reduceOperation
	)

Definition at line 70 of file Reduce.cpp.

 {
     armnn::TensorShape inputDims = inputInfo.GetShape();
     unsigned int inputNumDims    = inputInfo.GetNumDimensions();
     unsigned int numOutputs      = outputInfo.GetNumElements();
  
     // Initialise temp output
     std::vector<float> tempOut(numOutputs);
     switch(reduceOperation)
     {
         case ReduceOperation::Mean:
         case ReduceOperation::Sum:
             std::fill(tempOut.begin(), tempOut.end(), 0.0f);
             break;
         case ReduceOperation::Prod:
             std::fill(tempOut.begin(), tempOut.end(), 1.0f);
             break;
         case ReduceOperation::Max:
             std::fill(tempOut.begin(), tempOut.end(), -1 * std::numeric_limits<float>::max());
             break;
         case ReduceOperation::Min:
             std::fill(tempOut.begin(), tempOut.end(), std::numeric_limits<float>::max());
             break;
         default:
             throw armnn::InvalidArgumentException("Unknown reduce method: " +
                 std::to_string(static_cast<int>(reduceOperation)));
     }
  
     // Initialise temp index
     std::vector<unsigned int> tempIndex(inputNumDims, 0);
  
     std::vector<unsigned int> resolvedAxis = axis;
     if (resolvedAxis.empty())
     {
         for (unsigned int idx = 0; idx < inputNumDims; ++idx)
         {
             resolvedAxis.push_back(idx);
         }
     }
     auto numResolvedAxis = armnn::numeric_cast<unsigned int>(resolvedAxis.size());
  
     // Iterates through input_data and operates over the reduced axis
     for (bool hasNext = true; hasNext; hasNext = NextIndex(inputNumDims, inputDims, tempIndex))
     {
         unsigned int inputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, 0, {});
         unsigned int outputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex,
                                                         numResolvedAxis, resolvedAxis);
         input[inputOffset];
         auto inputValue = input.Get();
         switch(reduceOperation)
         {
             case ReduceOperation::Mean:
             case ReduceOperation::Sum:
                 tempOut[outputOffset] += inputValue;
                 break;
             case ReduceOperation::Prod:
                 tempOut[outputOffset] *= inputValue;
                 break;
             case ReduceOperation::Max:
                 if (inputValue > tempOut[outputOffset])
                 {
                     tempOut[outputOffset] = inputValue;
                 }
                 break;
             case ReduceOperation::Min:
                 if (inputValue < tempOut[outputOffset])
                 {
                     tempOut[outputOffset] = inputValue;
                 }
                 break;
             default:
                 throw armnn::InvalidArgumentException("Unknown reduce method: " +
                     std::to_string(static_cast<int>(reduceOperation)));
         }
     }
  
     // Takes average by num of elements added to get MEAN
     size_t numElementsInAxis = 1;
     for (unsigned int idx = 0; idx < numResolvedAxis; ++idx)
     {
         unsigned int current = inputDims[resolvedAxis[idx]];
         ARMNN_ASSERT(armnn::numeric_cast<float>(current) <
                      (std::numeric_limits<float>::max() / armnn::numeric_cast<float>(numElementsInAxis)));
         numElementsInAxis *= current;
     }
  
     for (unsigned int idx = 0; idx < numOutputs; ++idx)
     {
         output[idx];
         if (reduceOperation == ReduceOperation::Mean)
         {
             if (numElementsInAxis > 0)
             {
                 output.Set(tempOut[idx] / armnn::numeric_cast<float>(numElementsInAxis));
             }
         }
         else
         {
             output.Set(tempOut[idx]);
         }
     }
 }

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), Max, Mean, Min, NextIndex(), Prod, ReducedOutputOffset(), Encoder< IType >::Set(), and Sum.

◆ ReducedOutputOffset()

unsigned int armnn::ReducedOutputOffset	(	const unsigned int	numDims,
		const armnn::TensorShape &	dims,
		std::vector< unsigned int > &	index,
		const unsigned int	numAxis,
		const std::vector< unsigned int > &	axis
	)

Definition at line 40 of file Reduce.cpp.

 {
     unsigned int offset = 0;
     for (unsigned int idx = 0; idx < numDims; ++idx)
     {
         bool isAxis = false;
         if (!axis.empty())
         {
             for (unsigned int axisIdx = 0; axisIdx < numAxis; ++axisIdx)
             {
                 if (idx == axis[axisIdx])
                 {
                     isAxis = true;
                     break;
                 }
             }
         }
         if (!isAxis)
         {
             offset = offset * dims[idx] + index[idx];
         }
     }
     return offset;
 }

Referenced by Reduce().

◆ RefBackendId()

constexpr const char* armnn::RefBackendId ( )

constexpr

Definition at line 10 of file RefBackendId.hpp.

10 { return "CpuRef"; }

Referenced by RefBackend::GetIdStatic().

◆ RefTensorHandleFactoryId()

constexpr const char* armnn::RefTensorHandleFactoryId ( )

constexpr

Definition at line 15 of file RefTensorHandleFactory.hpp.

15 { return "Arm/Ref/TensorHandleFactory"; }

Referenced by RefTensorHandleFactory::GetIdStatic().

◆ RemoveReshapeLayer()

void armnn::RemoveReshapeLayer	(	ReshapeLayer *	baseLayer,
		std::map< LayerGuid, Layer * > &	untouched,
		OptimizationViews &	optimizationViews
	)

inline

Definition at line 293 of file SubgraphUtils.hpp.

 {
     if (baseLayer == nullptr)
     {
         return;
     }
     ReshapeDescriptor reshapeDescriptor = baseLayer->GetParameters();
     Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
  
     // Cannot currently remove the Reshape if it's connected to an Input, Constant or Splitter
     if (parentLayer.GetType() == LayerType::Input || parentLayer.GetType() == LayerType::Constant)
     {
         return;
     }
  
     // Cannot currently remove the Reshape if it's connected to an OutputSlot or Concat
     for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
     {
         Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
  
         if (nextLayer.GetType() == LayerType::Output)
         {
             return;
         }
     }
     auto it = untouched.find(baseLayer->GetGuid());
     if (it == untouched.end())
     {
         // Already removed from map
         return;
     }
     untouched.erase(it);
  
     // Override the InputSlot TensorInfos for all the layers connected to the Reshape's OutputSlot
     for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
     {
         Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
         auto inputIndex = baseLayer->GetOutputSlot(0).GetConnection(i)->GetSlotIndex();
         TensorInfo reshapeInfo(baseLayer->GetOutputSlot(0).GetTensorInfo());
         reshapeInfo.SetShape(reshapeDescriptor.m_TargetShape);
         nextLayer.GetInputSlot(inputIndex).SetTensorInfo(reshapeInfo);
     }
     optimizationViews.AddDeletedSubgraph(baseLayer);
 }

References OptimizationViews::AddDeletedSubgraph(), Constant, InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), Layer::GetGuid(), Layer::GetInputSlot(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), LayerWithParameters< Parameters >::GetParameters(), InputSlot::GetSlotIndex(), OutputSlot::GetTensorInfo(), Layer::GetType(), Input, ReshapeDescriptor::m_TargetShape, Output, TensorInfo::SetShape(), and InputSlot::SetTensorInfo().

Referenced by RefBackend::OptimizeSubgraphView(), NeonBackend::OptimizeSubgraphView(), and ClBackend::OptimizeSubgraphView().

◆ ReorderWeightChannelsForAcl()

ConstTensor armnn::ReorderWeightChannelsForAcl	(	const ConstTensor &	weightHandle,
		DataLayout	dataLayout,
		void *	permuteBuffer
	)

Definition at line 73 of file WorkloadUtils.cpp.

 {
     DataType* weight = static_cast<DataType*>(permuteBuffer);
     const TensorShape& weightShape = weightHandle.GetShape();
     unsigned int multiplier;
     unsigned int height;
     unsigned int width;
     unsigned int inputChannels;
     switch (dataLayout)
     {
         case DataLayout::NHWC:    //It actually is [ H, W, I, M ]
             height        = weightShape[0];
             width         = weightShape[1];
             inputChannels = weightShape[2];
             multiplier    = weightShape[3];
             break;
         case DataLayout::NCHW:    //It actually is [ M, I, H, W ]
         default:
             height        = weightShape[2];
             width         = weightShape[3];
             inputChannels = weightShape[1];
             multiplier    = weightShape[0];
             break;
     }
  
     std::vector<DataType> weightAclOrder(height*width*inputChannels*multiplier);
     unsigned int destinationWeightsChannel;
     unsigned int totalChannels = inputChannels * multiplier;
     unsigned int channelSize   = height * width;
     unsigned int inputChannel  = 0;
  
     for (unsigned int originWeightsChannel = 0; originWeightsChannel < totalChannels; originWeightsChannel++)
     {
         inputChannel = originWeightsChannel % inputChannels;
         destinationWeightsChannel = (originWeightsChannel - inputChannel) / inputChannels + multiplier * inputChannel;
  
         for (unsigned int i = 0; i < channelSize; i++)
         {
             weightAclOrder[i + destinationWeightsChannel * channelSize] =
                     weight[i + originWeightsChannel * channelSize];
         }
     }
  
     ::memcpy(permuteBuffer, weightAclOrder.data(), weightHandle.GetInfo().GetNumBytes());
     return ConstTensor(weightHandle.GetInfo(), permuteBuffer);
 }

References BaseTensor< MemoryType >::GetInfo(), TensorInfo::GetNumBytes(), BaseTensor< MemoryType >::GetShape(), NCHW, and NHWC.

◆ ReplaceLayers()

void armnn::ReplaceLayers	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		std::vector< IConnectableLayer * > &	layers
	)

Definition at line 345 of file ArmComputeSubgraphUtils.hpp.

 {
     std::list<IConnectableLayer*> replacementLayers(layers.begin(), layers.end());
  
     SubgraphView substitutionSubgraph(baseLayer);
     SubgraphView replacementSubgraph(std::move(replacementLayers),
                                      CreateIInputsFrom({replacementLayers.front()}),
                                      CreateIOutputsFrom({replacementLayers.back()}));
  
     optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
 }

References OptimizationViews::AddSubstitution().

◆ ReplaceMultipleLayers()

void armnn::ReplaceMultipleLayers	(	OptimizationViews &	optimizationViews,
		std::vector< IConnectableLayer * > &	originalLayers,
		LayerType *	baseLayer,
		const std::vector< SlotList >	inputLayersSlotLists,
		const std::vector< SlotList >	outputLayersSlotLists
	)

Definition at line 363 of file ArmComputeSubgraphUtils.hpp.

 {
     std::list<IConnectableLayer*> originalLayerList(originalLayers.begin(), originalLayers.end());
  
     SubgraphView substitutionSubgraph(
             std::move(originalLayerList),
             CreateIInputsFromSlotLists<armnn::IConnectableLayer>(originalLayers, inputLayersSlotLists),
             CreateIOutputsFromSlotLists<armnn::IConnectableLayer>(originalLayers, outputLayersSlotLists));
     SubgraphView replacementSubgraph(baseLayer);
  
     optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
 }

References OptimizationViews::AddSubstitution().

◆ ReportError()

void armnn::ReportError	(	const std::string &	errorMessage,
		Optional< std::vector< std::string > & >	errorMessages
	)

Definition at line 756 of file Network.cpp.

 {
     std::stringstream fullErrorMessage;
     fullErrorMessage << "ERROR: " << errorMessage;
     ARMNN_LOG(warning) << fullErrorMessage.str();
     if (errorMessages)
     {
         errorMessages.value().push_back(fullErrorMessage.str());
     }
 }

References ARMNN_LOG, and warning.

Referenced by AssignBackends(), CheckScaleSetOnQuantizedType(), Optimize(), and ReturnWithError().

◆ ReportUntouchedLayers()

void armnn::ReportUntouchedLayers	(	OptimizationViews &	optimizationViews,
		std::map< LayerGuid, Layer * >	untouched
	)

inline

Definition at line 220 of file SubgraphUtils.hpp.

 {
     std::vector<Layer*> untouchedVector;
     for (const auto& pair : untouched)
     {
         Layer* layer = pair.second;
         SubgraphView subgraphView({layer},
                                   CreateIInputsFrom({layer}),
                                   CreateIOutputsFrom({layer}));
         optimizationViews.AddUntouchedSubgraph(std::move(subgraphView));
     }
 }

References OptimizationViews::AddUntouchedSubgraph().

Referenced by RefBackend::OptimizeSubgraphView(), NeonBackend::OptimizeSubgraphView(), ClBackend::OptimizeSubgraphView(), and GpuFsaBackend::OptimizeSubgraphView().

◆ ReportWarning()

void armnn::ReportWarning	(	const std::string &	warningMessage,
		Optional< std::vector< std::string > & >	warningMessages
	)

Definition at line 768 of file Network.cpp.

 {
     std::stringstream fullWarningMessage;
     fullWarningMessage << "WARNING: " << warningMessage;
     ARMNN_LOG(warning) << fullWarningMessage.str();
     if (warningMessages)
     {
         warningMessages.value().push_back(fullWarningMessage.str());
     }
 }

References ARMNN_LOG, and warning.

Referenced by ApplyBackendOptimizations(), and AttemptBackendAssignment().

◆ RequiresCopy()

bool armnn::RequiresCopy	(	ITensorHandleFactory::FactoryId	src,
		ITensorHandleFactory::FactoryId	dst,
		TensorHandleFactoryRegistry &	registry
	)

Definition at line 1458 of file Network.cpp.

 {
     if (src != dst)
     {
         ITensorHandleFactory* srcFactory = registry.GetFactory(src);
         ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
  
         if (srcFactory && dstFactory &&
             (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
         {
             return false;
         }
         return true;
     }
     return false;
 }

References ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), and ITensorHandleFactory::GetImportFlags().

Referenced by CalculateSlotOption().

◆ ReshapeWeightsForAcl()

void ReshapeWeightsForAcl	(	TensorInfo &	weightInfo,
		DataLayout	dataLayout
	)

Definition at line 47 of file WorkloadUtils.cpp.

 {
     // Reshape the weights in-place
     const TensorShape& weightShape = weightInfo.GetShape();
     switch (dataLayout)
     {
         case DataLayout::NHWC:
             // The data layout is NHWC, reshape from [ H, W, I, M ] to [ 1, H, W, I * M ]
             weightInfo.SetShape({ 1,
                                   weightShape[0],
                                   weightShape[1],
                                   weightShape[2] * weightShape[3] });
             weightInfo.SetShape({ 1,
                                   weightShape[0] * weightShape[1],
                                   weightShape[2],
                                   weightShape[3] });
             break;
         case DataLayout::NCHW:
         default:
             // The data layout is NCHW, reshape from [ M, I, H, W ] to [ 1, I * M, H, W, ]
             weightInfo.SetShape({ 1, weightShape[0] * weightShape[1], weightShape[2], weightShape[3] });
             break;
     }
 }

References TensorInfo::GetShape(), NCHW, NHWC, and TensorInfo::SetShape().

Referenced by ConvertWeightTensorFromArmnnToAcl(), and ConvertWeightTensorInfoFromArmnnToAcl().

◆ Resize()

void Resize	(	Decoder< float > &	in,
		const TensorInfo &	inputInfo,
		Encoder< float > &	out,
		const TensorInfo &	outputInfo,
		DataLayoutIndexed	dataLayout,
		ResizeMethod	resizeMethod,
		bool	alignCorners,
		bool	halfPixelCenters
	)

Definition at line 65 of file Resize.cpp.

 {
     // alignCorners and halfPixelCenters cannot both be true
     ARMNN_ASSERT(!(alignCorners && halfPixelCenters));
  
     // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
     // image is projected into the input image to figure out the interpolants and weights. Note that this
     // will yield different results than if projecting the centre of output texels.
  
     const unsigned int batchSize = inputInfo.GetShape()[0];
     const unsigned int channelCount = inputInfo.GetShape()[dataLayout.GetChannelsIndex()];
  
     const unsigned int inputHeight = inputInfo.GetShape()[dataLayout.GetHeightIndex()];
     const unsigned int inputWidth = inputInfo.GetShape()[dataLayout.GetWidthIndex()];
     const unsigned int outputHeight = outputInfo.GetShape()[dataLayout.GetHeightIndex()];
     const unsigned int outputWidth = outputInfo.GetShape()[dataLayout.GetWidthIndex()];
  
     // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates
     // in the input image.
     const float scaleY = CalculateResizeScale(inputHeight, outputHeight, alignCorners);
     const float scaleX = CalculateResizeScale(inputWidth, outputWidth, alignCorners);
  
     const TensorShape& inputShape =  inputInfo.GetShape();
     const TensorShape& outputShape =  outputInfo.GetShape();
  
     for (unsigned int n = 0; n < batchSize; ++n)
     {
         for (unsigned int c = 0; c < channelCount; ++c)
         {
             for (unsigned int y = 0; y < outputHeight; ++y)
             {
                 // Corresponding real-valued height coordinate in input image.
                 float iy = PixelScaler(y, scaleY, halfPixelCenters, resizeMethod);
  
                 // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
                 const float fiy = (resizeMethod == ResizeMethod::NearestNeighbor && alignCorners) ? armnn::roundf(iy)
                                                                                                   : floorf(iy);
                 // Pixel scaling a value with Half Pixel Centers can be negative, if so set to 0
                 const unsigned int y0 = static_cast<unsigned int>(std::max(fiy, 0.0f));
  
                 // Interpolation weight (range [0,1]).
                 const float yw = iy - fiy;
  
                 for (unsigned int x = 0; x < outputWidth; ++x)
                 {
                     // Real-valued and discrete width coordinates in input image.
                     float ix = PixelScaler(x, scaleX, halfPixelCenters, resizeMethod);
  
                     // Nearest Neighbour uses rounding to align to corners
                     const float fix = resizeMethod == ResizeMethod::NearestNeighbor && alignCorners ? armnn::roundf(ix)
                                                                                                     : floorf(ix);
                     // Pixel scaling a value with Half Pixel Centers can be negative, if so set to 0
                     const unsigned int x0 = static_cast<unsigned int>(std::max(fix, 0.0f));
  
                     // Interpolation weight (range [0,1]).
                     const float xw = ix - fix;
  
                     unsigned int x1;
                     unsigned int y1;
                     // Half Pixel Centers uses the scaling to compute a weighted parameter for nearby pixels
                     if (halfPixelCenters)
                     {
                         x1 = std::min(static_cast<unsigned int>(std::ceil(ix)), inputWidth - 1u);
                         y1 = std::min(static_cast<unsigned int>(std::ceil(iy)), inputHeight - 1u);
                     }
                     // Discrete width/height coordinates of texels below and to the right of (x0, y0).
                     else
                     {
                         x1 = std::min(x0 + 1, inputWidth - 1u);
                         y1 = std::min(y0 + 1, inputHeight - 1u);
                     }
  
                     float interpolatedValue;
                     switch (resizeMethod)
                     {
                         case ResizeMethod::Bilinear:
                         {
                             in[dataLayout.GetIndex(inputShape, n, c, y0, x0)];
                             float input1 = in.Get();
                             in[dataLayout.GetIndex(inputShape, n, c, y0, x1)];
                             float input2 = in.Get();
                             in[dataLayout.GetIndex(inputShape, n, c, y1, x0)];
                             float input3 = in.Get();
                             in[dataLayout.GetIndex(inputShape, n, c, y1, x1)];
                             float input4 = in.Get();
  
                             const float ly0 = Lerp(input1, input2, xw); // lerp along row y0.
                             const float ly1 = Lerp(input3, input4, xw); // lerp along row y1.
                             interpolatedValue = Lerp(ly0, ly1, yw);
                             break;
                         }
                         case ResizeMethod::NearestNeighbor:
                         {
                             // calculate euclidean distance to the 4 neighbours
                             auto distance00 = EuclideanDistance(fix, fiy, x0, y0);
                             auto distance01 = EuclideanDistance(fix, fiy, x0, y1);
                             auto distance10 = EuclideanDistance(fix, fiy, x1, y0);
                             auto distance11 = EuclideanDistance(fix, fiy, x1, y1);
  
                             auto minimum = std::min( { distance00, distance01, distance10, distance11 } );
  
                             unsigned int xNearest = 0;
                             unsigned int yNearest = 0;
  
                             if (minimum == distance00)
                             {
                                xNearest = x0;
                                yNearest = y0;
                             }
                             else if (minimum == distance01)
                             {
                                 xNearest = x0;
                                 yNearest = y1;
                             }
                             else if (minimum == distance10)
                             {
                                 xNearest = x1;
                                 yNearest = y0;
                             }
                             else if (minimum == distance11)
                             {
                                 xNearest = x1;
                                 yNearest = y1;
                             }
                             else
                             {
                                 throw InvalidArgumentException("Resize Nearest Neighbor failure");
                             }
  
                             in[dataLayout.GetIndex(inputShape, n, c, yNearest, xNearest)];
                             interpolatedValue = in.Get();
                             break;
                         }
                         default:
                             throw InvalidArgumentException("Unknown resize method: " +
                                                             std::to_string(static_cast<int>(resizeMethod)));
                     }
                     out[dataLayout.GetIndex(outputShape, n, c, y, x)];
                     out.Set(interpolatedValue);
                 }
             }
         }
     }
 }

References ARMNN_ASSERT, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Resize(), roundf(), and Encoder< IType >::Set().

Referenced by Resize(), and ResizeLayer::ResizeLayer().

◆ ReturnWithError()

OptimizationResult armnn::ReturnWithError	(	OptimizationResult	res,
		const Layer *	layer,
		const BackendSettings &	backendSettings,
		Optional< std::vector< std::string > & >	errMessages
	)

Definition at line 780 of file Network.cpp.

 {
     std::stringstream failureMsg;
     failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
                << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
     ReportError(failureMsg.str(), errMessages);
  
     res.m_Error = true;
     return res;
 }

References GetLayerTypeAsCString(), Layer::GetType(), OptimizationResult::m_Error, BackendSettings::m_PreferredBackends, and ReportError().

Referenced by AssignBackendsIConnectable(), and AttemptBackendAssignment().

◆ ReverseGetFlatIdx()

unsigned int armnn::ReverseGetFlatIdx	(	const std::vector< unsigned int > &	idxList,
		unsigned int	inputRank,
		std::vector< unsigned int > &	elementNumInner
	)

Definition at line 34 of file ReverseV2Impl.cpp.

 {
     unsigned int idx = 0;
  
     for (unsigned int iDim = 0; iDim < inputRank; ++iDim)
     {
         idx += idxList[iDim] * elementNumInner[iDim];
     }
  
     return idx;
 }

Referenced by ReverseRelocateIdx().

◆ ReverseGetMultIdx()

std::vector<unsigned int> armnn::ReverseGetMultIdx	(	const unsigned int	idx,
		unsigned int	inputRank,
		std::vector< unsigned int > &	elementNumInner
	)

Definition at line 16 of file ReverseV2Impl.cpp.

 {
     std::vector<unsigned int> indexList(inputRank);
  
     unsigned int mIdx = idx;
  
     for (unsigned int iDim = 0; iDim < inputRank; ++iDim)
     {
         indexList[iDim] = static_cast<unsigned int>(mIdx / elementNumInner[iDim]);
         mIdx %= elementNumInner[iDim];
     }
  
     return indexList;
 }

Referenced by ReverseRelocateIdx().

◆ ReverseRelocateIdx()

unsigned int armnn::ReverseRelocateIdx	(	unsigned int	idx,
		unsigned int	inputRank,
		std::vector< bool > &	axisFlag,
		std::vector< unsigned int > &	dimSize,
		std::vector< unsigned int > &	elementNumInner
	)

Definition at line 49 of file ReverseV2Impl.cpp.

 {
     // Get the multidimensional index list for input
     auto inputIdxList = ReverseGetMultIdx(idx, inputRank, elementNumInner);
  
     std::vector<unsigned int> outputIdxList(inputRank);
  
     // Relocate the input index to the output one
     for (unsigned int iDim = 0; iDim < inputRank; ++iDim)
     {
         if (axisFlag[iDim])
         {
             outputIdxList[iDim] = dimSize[iDim] - inputIdxList[iDim] - 1;
         }
         else
         {
             outputIdxList[iDim] = inputIdxList[iDim];
         }
     }
  
     // Get the 1-dimensional flattened index for output
     unsigned int outputIdx = ReverseGetFlatIdx(outputIdxList, inputRank, elementNumInner);
     return outputIdx;
 }

References ReverseGetFlatIdx(), and ReverseGetMultIdx().

Referenced by ReverseV2().

◆ ReverseV2()

void ReverseV2	(	const TensorInfo &	inputInfo,
		const TensorInfo &	axisInfo,
		Decoder< float > &	inputDecoder,
		Decoder< int > &	axisDecoder,
		Encoder< float > &	outputEncoder
	)

Definition at line 78 of file ReverseV2Impl.cpp.

 {
     unsigned int axesRank = static_cast<unsigned int>(axisInfo.GetNumElements());
  
     // Empty axis and empty tensor case: copy input to output
     if ((axesRank == 0) || inputInfo.GetNumElements() == 0)
     {
         for (unsigned idx = 0; idx < inputInfo.GetNumElements(); idx++)
         {
             float inputValue = inputDecoder.Get();
             inputDecoder += 1;
             outputEncoder.Set(inputValue);
             outputEncoder += 1;
         }
         return;
     }
  
     unsigned int inputRank = static_cast<unsigned int>(inputInfo.GetNumDimensions());
  
     std::vector<bool> axisFlag(inputRank, false);
     std::vector<unsigned int> dimSize(inputRank, 0);
     std::vector<int32_t> axis(axesRank, 0);
  
     // Decode the axis information
     for (unsigned int i=0; i < axesRank; i++)
     {
         axis[i] = axisDecoder.Get();
         axisDecoder += 1;
     }
  
     // Make sure the axes are positive
     for (int32_t axisElement: axis)
     {
         axisElement = axisElement < 0 ? axisElement + static_cast<int32_t>(inputRank) : axisElement;
         axisFlag[static_cast<uint32_t>(axisElement)] = true;
     }
  
     const TensorShape &inputShape = inputInfo.GetShape();
  
     unsigned int elementNum = inputInfo.GetNumElements();
     unsigned int baseDimSize = 1;
  
     std::vector<unsigned int> elementNumInner;
  
     // Get the number of element within the specific dimension
     for (unsigned int iDim = 0; iDim < inputRank; ++iDim) {
         dimSize[iDim] = inputShape[iDim];
         baseDimSize *= dimSize[iDim];
         elementNumInner.push_back(static_cast<unsigned int>(elementNum / baseDimSize));
     }
  
     // Iterate through all elements
     for (unsigned int idx = 0; idx < elementNum; ++idx)
     {
         float inputValue = inputDecoder.Get();
         inputDecoder += 1;
         auto outputIdx = ReverseRelocateIdx(idx, inputRank, axisFlag, dimSize, elementNumInner);
         outputEncoder[outputIdx];
         outputEncoder.Set(inputValue);
     }
 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), ReverseRelocateIdx(), and Encoder< IType >::Set().

◆ RevertConstantWeightsToFP32()

bool armnn::RevertConstantWeightsToFP32 ( Layer * layer )

◆ roundf()

float armnn::roundf ( float value )

inline

Definition at line 43 of file Utils.hpp.

 {
     // Workaround Valgrind's mismatches: when running from Valgrind the call to std::round(4.5) == 4.0 instead of 5.0
     return (value < 0.f) ? ::floorf(value - 0.5f) : ::floorf(value + 0.5f);
 }

Referenced by Resize().

◆ RunClFunction()

void RunClFunction	(	arm_compute::IFunction &	function,
		const CheckLocation &	location
	)

inline

Definition at line 168 of file ClWorkloadUtils.hpp.

 {
     try
     {
         function.run();
     }
     catch (cl::Error& error)
     {
         throw WrapClError(error, location);
     }
 }

References error, and WrapClError().

◆ SelectTensorHandleStrategy()

OptimizationResult SelectTensorHandleStrategy	(	Graph &	optGraph,
		BackendsMap &	backends,
		TensorHandleFactoryRegistry &	registry,
		bool	importEnabled,
		bool	exportEnabled,
		Optional< std::vector< std::string > & >	errMessages
	)

Definition at line 1812 of file Network.cpp.

 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_SelectTensorHandleStrategy");
     OptimizationResult result;
  
     optGraph.ForEachLayer([&backends, &registry, &result, &errMessages, importEnabled, exportEnabled](Layer* layer)
     {
         ARMNN_ASSERT(layer);
  
         // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
         // assignment if this check fails
         ARMNN_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
  
         // Check each output separately
         for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
         {
             OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
  
             ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
  
             // Calculate the factory to use which results in the fewest copies being made.
             switch(layer->GetType())
             {
                 case LayerType::Input:
                     slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry, importEnabled);
                     break;
                 case LayerType::Output:
                     slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
                     break;
                 default:
                     slotOption = CalculateSlotOption(backends, outputSlot, registry, exportEnabled);
                     break;
             }
             outputSlot.SetTensorHandleFactory(slotOption);
  
             // Now determine the "best" edge strategy for each connection given the slotOption.
             unsigned int connectionIdx = 0;
             for (auto&& connection : outputSlot.GetConnections())
             {
                 const Layer& connectedLayer = connection->GetOwningLayer();
  
                 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer,
                                                               registry, importEnabled);
  
                 if (strategy == EdgeStrategy::Undefined)
                 {
                     result.m_Error = true;
                     if (errMessages)
                     {
                         errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
                                                          " between backends.");
                     }
                     return;
                 }
  
                 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
  
                 connectionIdx++;
             }
         }
     });
  
     return result;
 }

References ARMNN_ASSERT, ARMNN_SCOPED_PROFILING_EVENT, CalculateEdgeStrategy(), CalculateSlotOption(), CalculateSlotOptionForInput(), CalculateSlotOptionForOutput(), Graph::ForEachLayer(), Layer::GetBackendId(), OutputSlot::GetConnections(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), Layer::GetType(), Input, ITensorHandleFactory::LegacyFactoryId, OptimizationResult::m_Error, Output, OutputSlot::SetEdgeStrategy(), OutputSlot::SetTensorHandleFactory(), and Undefined.

Referenced by Optimize().

◆ SetAllLoggingSinks()

void SetAllLoggingSinks	(	bool	standardOut,
		bool	debugOut,
		bool	coloured
	)

Definition at line 191 of file Logging.cpp.

 {
     SetLoggingSinks<LogSeverity::Trace>(standardOut, debugOut, coloured);
     SetLoggingSinks<LogSeverity::Debug>(standardOut, debugOut, coloured);
     SetLoggingSinks<LogSeverity::Info>(standardOut, debugOut, coloured);
     SetLoggingSinks<LogSeverity::Warning>(standardOut, debugOut, coloured);
     SetLoggingSinks<LogSeverity::Error>(standardOut, debugOut, coloured);
     SetLoggingSinks<LogSeverity::Fatal>(standardOut, debugOut, coloured);
 }

Referenced by ConfigureLogging(), and TEST_SUITE().

◆ SetClSliceData()

auto SetClSliceData	(	const std::vector< unsigned int > &	m_begin,
		const std::vector< unsigned int > &	m_size
	)

inline

Definition at line 100 of file ClWorkloadUtils.hpp.

 {
     // This function must translate the size vector given to an end vector
     // expected by the ACL NESlice workload
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
  
     unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
  
     // For strided slices, we have the relationship size = (end - begin) / stride
     // For slice, we assume stride to be a vector of all ones, yielding the formula
     // size = (end - begin) therefore we know end = size + begin
     for (unsigned int i = 0; i < num_dims; i++)
     {
         unsigned int revertedIndex = num_dims - i - 1;
  
         starts.set(i, static_cast<int>(m_begin[revertedIndex]));
         ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
     }
  
     return std::make_tuple(starts, ends);
 }

Referenced by ClSliceWorkload::ClSliceWorkload().

◆ SetClStridedSliceData()

auto SetClStridedSliceData	(	const std::vector< int > &	m_begin,
		const std::vector< int > &	m_end,
		const std::vector< int > &	m_stride
	)

inline

Definition at line 79 of file ClWorkloadUtils.hpp.

 {
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
     arm_compute::Coordinates strides;
  
     unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
  
     for (unsigned int i = 0; i < num_dims; i++) {
         unsigned int revertedIndex = num_dims - i - 1;
  
         starts.set(i, static_cast<int>(m_begin[revertedIndex]));
         ends.set(i, static_cast<int>(m_end[revertedIndex]));
         strides.set(i, static_cast<int>(m_stride[revertedIndex]));
     }
  
     return std::make_tuple(starts, ends, strides);
 }

Referenced by ClStridedSliceWorkload::ClStridedSliceWorkload().

◆ SetLogFilter()

void SetLogFilter ( LogSeverity level )

Definition at line 73 of file Logging.cpp.

 {
     SimpleLogger<LogSeverity::Trace>::Get().Enable(false);
     SimpleLogger<LogSeverity::Debug>::Get().Enable(false);
     SimpleLogger<LogSeverity::Info>::Get().Enable(false);
     SimpleLogger<LogSeverity::Warning>::Get().Enable(false);
     SimpleLogger<LogSeverity::Error>::Get().Enable(false);
     SimpleLogger<LogSeverity::Fatal>::Get().Enable(false);
     switch (level)
     {
         case LogSeverity::Trace:
             SimpleLogger<LogSeverity::Trace>::Get().Enable(true);
             ARMNN_FALLTHROUGH;
         case LogSeverity::Debug:
             SimpleLogger<LogSeverity::Debug>::Get().Enable(true);
             ARMNN_FALLTHROUGH;
         case LogSeverity::Info:
             SimpleLogger<LogSeverity::Info>::Get().Enable(true);
             ARMNN_FALLTHROUGH;
         case LogSeverity::Warning:
             SimpleLogger<LogSeverity::Warning>::Get().Enable(true);
             ARMNN_FALLTHROUGH;
         case LogSeverity::Error:
             SimpleLogger<LogSeverity::Error>::Get().Enable(true);
             ARMNN_FALLTHROUGH;
         case LogSeverity::Fatal:
             SimpleLogger<LogSeverity::Fatal>::Get().Enable(true);
             break;
         default:
             ARMNN_ASSERT(false);
     }
 }

References ARMNN_ASSERT, ARMNN_FALLTHROUGH, Debug, SimpleLogger< Level >::Enable(), Error, Fatal, SimpleLogger< Level >::Get(), Info, Trace, and Warning.

Referenced by ConfigureLogging(), and TEST_SUITE().

◆ SetLoggingSinks()

void armnn::SetLoggingSinks	(	bool	standardOut,
		bool	debugOut,
		bool	coloured
	)

inline

Definition at line 167 of file Logging.cpp.

 {
     SimpleLogger<Level>::Get().RemoveAllSinks();
  
     if (standardOut)
     {
         if (coloured)
         {
             SimpleLogger<Level>::Get().AddSink(
                 std::make_shared<StandardOutputColourSink>(Level));
         } else
         {
             SimpleLogger<Level>::Get().AddSink(
                 std::make_shared<StandardOutputSink>());
         }
     }
  
     if (debugOut)
     {
         SimpleLogger<Level>::Get().AddSink(
             std::make_shared<DebugOutputSink>());
     }
 }

References SimpleLogger< Level >::AddSink(), SimpleLogger< Level >::Get(), and SimpleLogger< Level >::RemoveAllSinks().

◆ SetNeonSliceData()

auto armnn::SetNeonSliceData	(	const std::vector< unsigned int > &	m_begin,
		const std::vector< unsigned int > &	m_size
	)

inline

Definition at line 161 of file NeonWorkloadUtils.hpp.

 {
     // This function must translate the size vector given to an end vector
     // expected by the ACL NESlice workload
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
  
     unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
  
     // For strided slices, we have the relationship size = (end - begin) / stride
     // For slice, we assume stride to be a vector of all ones, yielding the formula
     // size = (end - begin) therefore we know end = size + begin
     for (unsigned int i = 0; i < num_dims; i++)
     {
         unsigned int revertedIndex = num_dims - i - 1;
  
         starts.set(i, static_cast<int>(m_begin[revertedIndex]));
         ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
     }
  
     return std::make_tuple(starts, ends);
 }

Referenced by NeonSliceWorkload::NeonSliceWorkload().

◆ SetNeonStridedSliceData()

auto armnn::SetNeonStridedSliceData	(	const std::vector< int > &	m_begin,
		const std::vector< int > &	m_end,
		const std::vector< int > &	m_stride
	)

inline

Definition at line 139 of file NeonWorkloadUtils.hpp.

 {
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
     arm_compute::Coordinates strides;
  
     unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
  
     for (unsigned int i = 0; i < num_dims; i++)
     {
         unsigned int revertedIndex = num_dims - i - 1;
  
         starts.set(i, static_cast<int>(m_begin[revertedIndex]));
         ends.set(i, static_cast<int>(m_end[revertedIndex]));
         strides.set(i, static_cast<int>(m_stride[revertedIndex]));
     }
  
     return std::make_tuple(starts, ends, strides);
 }

Referenced by NeonStridedSliceWorkload::NeonStridedSliceWorkload().

◆ SetValueChecked()

void armnn::SetValueChecked	(	Optional< T & >	optionalRef,
		V &&	val
	)

Definition at line 17 of file LayerSupportCommon.hpp.

 {
     if (optionalRef)
     {
         optionalRef.value() = val;
     }
 }

References OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by FalseFuncF16(), FalseFuncF32(), FalseFuncI32(), FalseFuncU8(), FalseInputFuncF16(), FalseInputFuncF32(), FalseOutputFuncF16(), FalseOutputFuncF32(), NeonLayerSupport::IsConcatSupported(), ClLayerSupport::IsConcatSupported(), ClLayerSupport::IsSplitterSupported(), and NeonLayerSupport::IsSplitterSupported().

◆ Slice()

void Slice	(	const TensorInfo &	inputInfo,
		const SliceDescriptor &	descriptor,
		const void *	inputData,
		void *	outputData,
		unsigned int	dataTypeSize
	)

Definition at line 14 of file Slice.cpp.

 {
     const TensorShape& inputShape = inputInfo.GetShape();
     const unsigned int numDims    = inputShape.GetNumDimensions();
  
     constexpr unsigned int maxNumDims = 4;
     if (descriptor.m_Begin.size() != numDims)
     {
         std::stringstream msg;
         msg << "Slice: Number of dimensions (" << numDims <<
             ") does not match the Begin vector in the descriptor (" << descriptor.m_Begin.size() << ")";
         throw InvalidArgumentException(msg.str());
     }
     if (descriptor.m_Size.size() != numDims)
     {
         std::stringstream msg;
         msg << "Slice: Number of dimensions (" << numDims <<
             ") does not match the Size vector in the descriptor (" << descriptor.m_Size.size() << ")";
         throw InvalidArgumentException(msg.str());
     }
     if (numDims > maxNumDims)
     {
         std::stringstream msg;
         msg << "Slice: Number of dimensions (" << numDims <<
             ") is greater than the maximum supported (" << maxNumDims << ")";
         throw InvalidArgumentException(msg.str());
     }
  
     std::vector<unsigned int> paddedInput(4);
     std::vector<unsigned int> paddedBegin(4);
     std::vector<unsigned int> paddedSize (4);
  
     const unsigned int numPaddingDims = maxNumDims - numDims;
     for (unsigned int i = 0u; i < maxNumDims; ++i)
     {
         if (i < numPaddingDims)
         {
             paddedInput[i] = 1u;
             paddedBegin[i] = 0u;
             paddedSize[i]  = 1u;
         }
         else
         {
             const unsigned int j = i - numPaddingDims;
             paddedInput[i] = inputShape[j];
             paddedBegin[i] = descriptor.m_Begin[j];
             paddedSize[i]  = descriptor.m_Size[j];
         }
     }
  
     unsigned int dim0 = paddedInput[0];
     unsigned int dim1 = paddedInput[1];
     unsigned int dim2 = paddedInput[2];
     unsigned int dim3 = paddedInput[3];
  
     unsigned int begin0 = paddedBegin[0];
     unsigned int begin1 = paddedBegin[1];
     unsigned int begin2 = paddedBegin[2];
     unsigned int begin3 = paddedBegin[3];
  
     unsigned int size0  = paddedSize[0];
     unsigned int size1  = paddedSize[1];
     unsigned int size2  = paddedSize[2];
     unsigned int size3  = paddedSize[3];
  
     if (begin0 + size0 > dim0)
     {
         std::stringstream msg;
         msg << "Slice: begin0 + size0 (" << (begin0 + size0) <<
             ") exceeds dim0 (" << dim0 << ")";
         throw InvalidArgumentException(msg.str());
     }
     if (begin1 + size1 > dim1)
     {
         std::stringstream msg;
         msg << "Slice: begin1 + size1 (" << (begin1 + size1) <<
             ") exceeds dim2 (" << dim1 << ")";
         throw InvalidArgumentException(msg.str());
     }
     if (begin2 + size2 > dim2)
     {
         std::stringstream msg;
         msg << "Slice: begin2 + size2 (" << (begin2 + size2) <<
             ") exceeds dim2 (" << dim2 << ")";
         throw InvalidArgumentException(msg.str());
     }
     if (begin3 + size3 > dim3)
     {
         std::stringstream msg;
         msg << "Slice: begin3 + size3 (" << (begin3 + size3) <<
             ") exceeds dim3 (" << dim3 << ")";
         throw InvalidArgumentException(msg.str());
     }
  
     if (inputData == nullptr)
     {
         throw armnn::NullPointerException("Slice: Null inputData pointer");
     }
     if (outputData == nullptr)
     {
         throw armnn::NullPointerException("Slice: Null outputData pointer");
     }
  
     const unsigned char* input = reinterpret_cast<const unsigned char*>(inputData);
     unsigned char* output      = reinterpret_cast<unsigned char*>(outputData);
  
     for (unsigned int idx0 = begin0; idx0 < begin0 + size0; ++idx0)
     {
         for (unsigned int idx1 = begin1; idx1 < begin1 + size1; ++idx1)
         {
             for (unsigned int idx2 = begin2; idx2 < begin2 + size2; ++idx2)
             {
                 for (unsigned int idx3 = begin3; idx3 < begin3 + size3; ++idx3)
                 {
                     const unsigned int inputOffset =
                         (((idx0 * dim1 + idx1) * dim2 + idx2) * dim3 + idx3) * dataTypeSize;
  
                     ::memcpy(output, input + inputOffset, dataTypeSize);
                     output += dataTypeSize;
                 }
             }
         }
     }
 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), SliceDescriptor::m_Begin, and SliceDescriptor::m_Size.

◆ Softmax()

void Softmax	(	Decoder< float > &	in,
		Encoder< float > &	out,
		const TensorInfo &	inputTensorInfo,
		float	beta,
		int	axis
	)

Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.

Definition at line 17 of file Softmax.cpp.

 {
     ARMNN_ASSERT_MSG(axis < static_cast<int>(inputTensorInfo.GetNumDimensions()),
                      "Required axis index greater than number of dimensions.");
     ARMNN_ASSERT_MSG(axis >= -static_cast<int>(inputTensorInfo.GetNumDimensions()),
                      "Required axis index lower than negative of the number of dimensions");
  
     unsigned int uAxis = axis < 0  ?
                          inputTensorInfo.GetNumDimensions() - static_cast<unsigned int>(abs(axis))
                          : static_cast<unsigned int>(axis);
  
     const TensorShape& inputShape = inputTensorInfo.GetShape();
     const unsigned int outerSize  = armnnUtils::GetNumElementsBetween(inputShape, 0, uAxis);
     const unsigned int axisSize   = inputShape[uAxis];
     const unsigned int innerSize  = armnnUtils::GetNumElementsBetween(inputShape,
                                                                       uAxis + 1,
                                                                       inputShape.GetNumDimensions());
  
     for (unsigned int outer = 0; outer < outerSize; ++outer)
     {
         unsigned int inputBeginIdx  = outer * axisSize * innerSize;
         unsigned int inputEndIdx    = inputBeginIdx + axisSize * innerSize;
         unsigned int outputBeginIdx = outer * axisSize * innerSize;
  
         for (unsigned int inner = 0; inner < innerSize; ++inner, ++inputBeginIdx, ++inputEndIdx, ++outputBeginIdx)
         {
             // Find max
             float maxValue = std::numeric_limits<float>::lowest();
             for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize)
             {
                 in[iter];
                 maxValue = std::max(maxValue, in.Get());
             }
  
             // Compute sum
             float sum = 0.0f;
             for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize)
             {
                 in[iter];
                 sum += std::exp((in.Get() - maxValue) * beta);
             }
  
             // Compute result
             unsigned int outputIter = outputBeginIdx;
             out[outputIter];
             for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize, outputIter += innerSize)
             {
                 out[outputIter];
                 in[iter];
                 out.Set(std::exp((in.Get() - maxValue) * beta) / sum);
             }
         }
     }
 }

References ARMNN_ASSERT_MSG, Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), and Encoder< IType >::Set().

◆ SpaceToBatchNd()

void SpaceToBatchNd	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const SpaceToBatchNdDescriptor &	params,
		Decoder< float > &	inputData,
		Encoder< float > &	outputData
	)

Definition at line 48 of file SpaceToBatchNd.cpp.

 {
     unsigned int rank = inputInfo.GetNumDimensions();
     if (rank != 3 && rank != 4 )
     {
         throw InvalidArgumentException("Tensor rank must be either 3 or 4, but it is " + std::to_string(rank),
                                        CHECK_LOCATION());
     }
  
     DataLayoutIndexed dataLayout = params.m_DataLayout;
     unsigned int channelDimension3D = params.m_DataLayout == DataLayout::NCHW ? 1 : 2;
  
     const TensorShape& inputShape = inputInfo.GetShape();
     const TensorShape& outputShape = outputInfo.GetShape();
  
     const unsigned int inputBatchSize  = inputShape[0];
     const unsigned int outputBatchSize = outputShape[0];
  
     const unsigned int channels = (rank == 3) ? inputShape[channelDimension3D]
                                               : inputShape[dataLayout.GetChannelsIndex()];
  
     const unsigned int inputHeight  = inputShape[dataLayout.GetHeightIndex()];
     const unsigned int inputWidth   = (rank == 3) ? 1 : inputShape[dataLayout.GetWidthIndex()];
     const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
     const unsigned int outputWidth  = (rank == 3) ? 1 : outputShape[dataLayout.GetWidthIndex()];
  
     const unsigned int blockHeight = params.m_BlockShape[0];
     const unsigned int blockWidth  = (rank == 3) ? 1 : params.m_BlockShape[1];
  
     const unsigned int paddingTop  = params.m_PadList[0].first;
     const unsigned int paddingLeft = (rank == 3) ? 0 : params.m_PadList[1].first;
  
     for (unsigned int outB = 0; outB < outputBatchSize; ++outB)
     {
         unsigned int inB = outB % inputBatchSize;
  
         unsigned int shiftW = (outB / inputBatchSize) % blockWidth;
         unsigned int shiftH = (outB / inputBatchSize) / blockWidth;
  
         for (unsigned int outH = 0; outH < outputHeight; ++outH)
         {
             for (unsigned int outW = 0; outW < outputWidth; ++outW)
             {
                 if (outH * blockHeight + shiftH < paddingTop ||
                     outH * blockHeight + shiftH >= paddingTop + inputHeight ||
                     outW * blockWidth + shiftW < paddingLeft ||
                     outW * blockWidth + shiftW >= paddingLeft + inputWidth)
                 {
                     for (unsigned int c = 0; c < channels; c++)
                     {
                         unsigned int outOffset = GetOffset(outputShape,
                                                            outB,
                                                            outH,
                                                            outW,
                                                            c,
                                                            dataLayout);
                         outputData += outOffset;
                         outputData.Set(0);
                         outputData -= outOffset;
                     }
                 }
                 else
                 {
                     for (unsigned int c = 0; c < channels; c++)
                     {
                         unsigned int inOffset = GetOffset(inputShape,
                                                           inB,
                                                           (outH * blockHeight + shiftH) - paddingTop,
                                                           (outW * blockWidth + shiftW) - paddingLeft,
                                                           c,
                                                           dataLayout);
  
                         unsigned int outOffset = GetOffset(outputShape,
                                                            outB,
                                                            outH,
                                                            outW,
                                                            c,
                                                            dataLayout);
  
                         outputData += outOffset;
                         inputData += inOffset;
                         outputData.Set(inputData.Get());
                         inputData -= inOffset;
                         outputData -= outOffset;
                     }
                 }
             }
         }
     }
 }

References CHECK_LOCATION, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetNumDimensions(), GetOffset(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToBatchNdDescriptor::m_BlockShape, SpaceToBatchNdDescriptor::m_DataLayout, SpaceToBatchNdDescriptor::m_PadList, Encoder< IType >::Set(), and SpaceToBatchNd().

Referenced by SpaceToBatchNd(), and SpaceToBatchNdLayer::SpaceToBatchNdLayer().

◆ SpaceToDepth()

void SpaceToDepth	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const SpaceToDepthDescriptor &	params,
		Decoder< float > &	inputData,
		Encoder< float > &	outputData
	)

Definition at line 36 of file SpaceToDepth.cpp.

 {
     DataLayoutIndexed dataLayout = params.m_DataLayout;
  
     const TensorShape& inputShape = inputInfo.GetShape();
     const TensorShape& outputShape = outputInfo.GetShape();
  
     const unsigned int inputBatchSize = inputShape[0];
     const unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
  
     const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
     const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
     const unsigned int outputChannels = outputShape[dataLayout.GetChannelsIndex()];
  
     const unsigned int blockSize = params.m_BlockSize;
  
     if (blockSize == 0)
     {
         throw InvalidArgumentException(
             "Input shape must be divisible by block size in all spatial dimensions: Block size is"
             " equal to zero");
     }
  
     for (unsigned int outChannelIndex = 0; outChannelIndex < outputChannels; outChannelIndex++)
     {
         unsigned int inChannelIndex = outChannelIndex % inputChannels;
  
         unsigned int shiftW = (outChannelIndex / inputChannels) % blockSize;
         unsigned int shiftH = (outChannelIndex / inputChannels) / blockSize;
  
         for (unsigned int outH = 0; outH < outputHeight; outH++)
         {
             for (unsigned int outW = 0; outW < outputWidth; outW++)
             {
                 for (unsigned int inBatchIndex = 0; inBatchIndex < inputBatchSize; inBatchIndex++)
                 {
                     unsigned int inOffset = GetOffset(inputShape,
                         inChannelIndex,
                         (outH * blockSize + shiftH),
                         (outW * blockSize + shiftW),
                         inBatchIndex,
                         dataLayout);
  
                     unsigned int outOffset = GetOffset(outputShape,
                         outChannelIndex,
                         outH,
                         outW,
                         inBatchIndex,
                         dataLayout);
  
                     outputData += outOffset;
                     inputData += inOffset;
                     outputData.Set(inputData.Get());
                     inputData -= inOffset;
                     outputData -= outOffset;
                 }
             }
         }
     }
 }

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), GetOffset(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToDepthDescriptor::m_BlockSize, SpaceToDepthDescriptor::m_DataLayout, Encoder< IType >::Set(), and SpaceToDepth().

Referenced by SpaceToDepth(), and SpaceToDepthLayer::SpaceToDepthLayer().

◆ Split()

void Split	(	const SplitterQueueDescriptor &	data,
		std::vector< ITensorHandle * >	inputs,
		std::vector< ITensorHandle * >	outputs
	)

Definition at line 21 of file Splitter.cpp.

 {
     const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
  
     std::unique_ptr<Decoder<float>> decoderPtr =
         MakeDecoder<float>(inputInfo, inputs[0]->Map());
     Decoder<float>& decoder = *decoderPtr;
  
     for (unsigned int index = 0; index < inputInfo.GetNumElements(); ++index)
     {
         unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
  
         unsigned int indexRemainder = index;
         unsigned int dimensionStride = inputInfo.GetNumElements();
  
         for (unsigned int i = 0; i<inputInfo.GetNumDimensions(); i++)
         {
             dimensionStride /= inputInfo.GetShape()[i];
             indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
             indexRemainder -= indices[i] * dimensionStride;
         }
  
         for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
         {
             SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
  
             //Split view extents are defined by the size of (the corresponding) input tensor.
             const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
             ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo.GetNumDimensions());
  
             // Check all dimensions to see if this element is inside the given input view.
             bool insideView = true;
             for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
             {
                 if (indices[i] < view.m_Origin[i])
                 {
                     insideView = false;
                 }
                 if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
                 {
                     insideView = false;
                 }
             }
  
             if (insideView)
             {
                 std::unique_ptr<Encoder<float>> encoderPtr =
                     MakeEncoder<float>(outputInfo, outputs[viewIdx]->Map());
                 Encoder<float>& encoder = *encoderPtr;
  
                 unsigned int outIndex = 0;
                 unsigned int dimensionStride = 1;
                 float inputValue = 0.f;
  
                 for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
                 {
                     outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
                     dimensionStride *= outputInfo.GetShape()[i];
                 }
  
                 decoder += index;
                 inputValue = decoder.Get();
                 decoder -= index;
  
                 encoder += outIndex;
                 encoder.Set(inputValue);
                 break;
             }
         }
     }
 }

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), SplitterQueueDescriptor::ViewOrigin::m_Origin, SplitterQueueDescriptor::m_ViewOrigins, Map, MaxNumOfTensorDimensions, and Encoder< IType >::Set().

◆ Splitter()

void armnn::Splitter	(	const SplitterQueueDescriptor &	data,
		std::vector< ITensorHandle * >	inputs,
		std::vector< ITensorHandle * >	outputs
	)

Definition at line 17 of file Splitter.hpp.

 {
     const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
  
     for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index)
     {
         unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
  
         unsigned int indexRemainder = index;
         unsigned int dimensionStride = inputInfo0.GetNumElements();
  
         for (unsigned int i = 0; i<inputInfo0.GetNumDimensions(); i++)
         {
             dimensionStride /= inputInfo0.GetShape()[i];
             indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
             indexRemainder -= indices[i] * dimensionStride;
         }
  
         for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
         {
             SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
  
             //Split view extents are defined by the size of (the corresponding) input tensor.
             const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
             ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions());
  
             // Check all dimensions to see if this element is inside the given input view.
             bool insideView = true;
             for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
             {
                 if (indices[i] < view.m_Origin[i])
                 {
                     insideView = false;
                 }
                 if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
                 {
                     insideView = false;
                 }
             }
  
             if (insideView)
             {
                 unsigned int outIndex = 0;
                 unsigned int dimensionStride = 1;
  
                 for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
                 {
                     outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
                     dimensionStride *= outputInfo.GetShape()[i];
                 }
  
                 //We are within the view, to copy input data to the output corresponding to this view.
                 DataType* outputData = GetOutputTensorData<DataType>(viewIdx, data);
                 ARMNN_ASSERT(outputData);
  
                 const DataType* inputData = GetInputTensorData<DataType>(0, data);
                 ARMNN_ASSERT(inputData);
  
                 outputData[outIndex] = inputData[index];
             }
         }
     }
 }

References ARMNN_ASSERT, TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), SplitterQueueDescriptor::ViewOrigin::m_Origin, SplitterQueueDescriptor::m_ViewOrigins, and MaxNumOfTensorDimensions.

◆ Stack()

void Stack	(	const StackQueueDescriptor &	data,
		std::vector< std::unique_ptr< Decoder< float >>> &	inputs,
		Encoder< float > &	output,
		const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo
	)

Definition at line 12 of file Stack.cpp.

 {
     unsigned int outputNumDims = outputInfo.GetNumDimensions();
     unsigned int inputNumDims = inputInfo.GetNumDimensions();
  
     const armnn::TensorShape& outputDims = outputInfo.GetShape();
     const armnn::TensorShape& inputDims = inputInfo.GetShape();
  
     unsigned int axis = data.m_Parameters.m_Axis;
  
     // Can perform a simple concatenation when axis == 0
     if (!axis)
     {
         unsigned int numInputs = data.m_Parameters.m_NumInputs;
         unsigned int inputLength = inputInfo.GetNumElements();
  
         for (unsigned int inputIdx=0; inputIdx<numInputs; ++inputIdx)
         {
             for (unsigned int elmt=0; elmt<inputLength; ++elmt)
             {
                 (*inputs[inputIdx])[elmt];
                 output[(inputIdx * inputLength) + elmt];
                 output.Set(inputs[inputIdx]->Get());
             }
         }
         return;
     }
  
     const unsigned int iNumTensors = static_cast<unsigned int>(data.m_Inputs.size());
     const unsigned int iBatchSize  = inputDims[0];
     const unsigned int iChannels   = (inputNumDims > 1) ? inputDims[1] : 1;
     const unsigned int iHeight     = (inputNumDims > 2) ? inputDims[2] : 1;
     const unsigned int iWidth      = (inputNumDims > 3) ? inputDims[3] : 1;
  
     const unsigned int oBatchSize  = outputDims[1];
     const unsigned int oChannels   = (outputNumDims > 2) ? outputDims[2] : 1;
     const unsigned int oHeight     = (outputNumDims > 3) ? outputDims[3] : 1;
     const unsigned int oWidth      = (outputNumDims > 4) ? outputDims[4] : 1;
  
     // Array to store the input coordinates
     // iCoordinates[0] = i, iCoordinates[1] = bi, iCoordinates[2] = ci
     // iCoordinates[3] = hi, iCoordinates[4] = wi, iCoordinates[5] = 0
     // iCoordinates[5] will be always zero and used for not incrementing
     // the output when the input has less than 4 dimensions
     std::array<unsigned int, 6> iCoordinates{ 0 };
  
     // Array of pointers used to map the output coordinates to the input ones, in accordance with the axis
     // This array is initialized with &iCoordinates[5] since this will be always zero
     std::array<unsigned int *, 5> oCoordinates = { &iCoordinates[5],
                                                    &iCoordinates[5],
                                                    &iCoordinates[5],
                                                    &iCoordinates[5],
                                                    &iCoordinates[5] };
  
     // Set the axis coordinate
     oCoordinates[axis] = &iCoordinates[0];
  
     // Map the output coordinates, accounting for the axis
     unsigned int dim_shift = 0;
     for(unsigned int dim = 0; dim < inputNumDims; ++dim)
     {
         if(dim == axis)
         {
             dim_shift++;
         }
         oCoordinates[dim + dim_shift] = &iCoordinates[dim + 1];
     }
  
     // Alias for the input coordinates
     unsigned int &i  = iCoordinates[0];
     unsigned int &bi = iCoordinates[1];
     unsigned int &ci = iCoordinates[2];
     unsigned int &hi = iCoordinates[3];
     unsigned int &wi = iCoordinates[4];
  
     // Alias for the output coordinates
     unsigned int &o  = *(oCoordinates[0]);
     unsigned int &bo = *(oCoordinates[1]);
     unsigned int &co = *(oCoordinates[2]);
     unsigned int &ho = *(oCoordinates[3]);
     unsigned int &wo = *(oCoordinates[4]);
  
     // Stack tensors
     for(; i < iNumTensors; ++(i))
     {
         for(bi = 0; bi < iBatchSize; ++(bi))
         {
             for(ci = 0; ci < iChannels; ++(ci))
             {
                 for(hi = 0; hi < iHeight; ++(hi))
                 {
                     for(wi = 0; wi < iWidth; ++(wi))
                     {
                         output[o  * oWidth * oHeight * oChannels * oBatchSize +
                                bo * oWidth * oHeight * oChannels +
                                co * oWidth * oHeight +
                                ho * oWidth +
                                wo];
  
                         output.Set(inputs[i]->Get());
  
                         ++(*(inputs[i]));
                     }
                 }
             }
         }
     }
 }

References TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), StackDescriptor::m_Axis, QueueDescriptor::m_Inputs, StackDescriptor::m_NumInputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

◆ StrEqual()

constexpr bool armnn::StrEqual	(	const char *	strA,
		const char(&)	strB[N]
	)

constexpr

Definition at line 201 of file TypesUtils.hpp.

 {
     bool isEqual = true;
     for (unsigned i = 0; isEqual && (i < N); ++i)
     {
         isEqual = (strA[i] == strB[i]);
     }
     return isEqual;
 }

Referenced by ParseComputeDevice().

◆ StridedSlice()

void StridedSlice	(	const TensorInfo &	inputInfo,
		const StridedSliceDescriptor &	params,
		const void *	inputData,
		void *	outputData,
		unsigned int	dataTypeSize
	)

Definition at line 90 of file StridedSlice.cpp.

 {
     if (inputData == nullptr)
     {
         throw armnn::InvalidArgumentException("Slice: Null inputData pointer");
     }
     if (outputData == nullptr)
     {
         throw armnn::InvalidArgumentException("Slice: Null outputData pointer");
     }
  
     const unsigned char* input = reinterpret_cast<const unsigned char*>(inputData);
     unsigned char* output = reinterpret_cast<unsigned char*>(outputData);
  
     const TensorShape inputShape = ExtendShape(inputInfo.GetShape(), 4);
  
     StridedSliceDescriptor paddedParams = params;
  
     // Pad parameters to 4 dimensions
     PadParams(paddedParams, 4);
  
     const int start0 = paddedParams.GetStartForAxis(inputShape, 0);
     const int stop0  = paddedParams.GetStopForAxis (inputShape, 0, start0);
  
     const int start1 = paddedParams.GetStartForAxis(inputShape, 1);
     const int stop1  = paddedParams.GetStopForAxis (inputShape, 1, start1);
  
     const int start2 = paddedParams.GetStartForAxis(inputShape, 2);
     const int stop2  = paddedParams.GetStopForAxis (inputShape, 2, start2);
  
     const int start3 = paddedParams.GetStartForAxis(inputShape, 3);
     const int stop3  = paddedParams.GetStopForAxis (inputShape, 3, start3);
  
     const int step = armnn::numeric_cast<int>(dataTypeSize);
  
     for (int in0 = start0;
          !LoopCondition(in0, stop0, paddedParams.m_Stride[0]);
          in0 += paddedParams.m_Stride[0])
     {
         for (int in1 = start1;
              !LoopCondition(in1, stop1, paddedParams.m_Stride[1]);
              in1 += paddedParams.m_Stride[1])
         {
             for (int in2 = start2;
                  !LoopCondition(in2, stop2, paddedParams.m_Stride[2]);
                  in2 += paddedParams.m_Stride[2])
             {
                 for (int in3 = start3;
                      !LoopCondition(in3, stop3, paddedParams.m_Stride[3]);
                      in3 += paddedParams.m_Stride[3])
                 {
                     int dim1 = armnn::numeric_cast<int>(inputShape[1]);
                     int dim2 = armnn::numeric_cast<int>(inputShape[2]);
                     int dim3 = armnn::numeric_cast<int>(inputShape[3]);
  
                     int inputOffset = (((in0 * dim1 + in1) * dim2 + in2) * dim3 + in3) * step;
                     ::memcpy(output, input + inputOffset, dataTypeSize);
                     output += step;
                 }
             }
         }
     }
 }

References TensorInfo::GetShape(), StridedSliceDescriptor::GetStartForAxis(), StridedSliceDescriptor::GetStopForAxis(), and StridedSliceDescriptor::m_Stride.

◆ StringToLogLevel()

LogSeverity armnn::StringToLogLevel ( std::string level )

inline

Definition at line 43 of file Logging.hpp.

 {
     // Transfer to lower case
     std::transform(level.begin(), level.end(), level.begin(),
                    [](unsigned char c){ return std::tolower(c); }
     );
  
     if (level == "trace")
     {
         return LogSeverity::Trace;
     }
     else if (level == "debug")
     {
         return LogSeverity::Debug;
     }
     else if (level == "info")
     {
         return LogSeverity::Info;
     }
     else if (level == "warning")
     {
         return LogSeverity::Warning;
     }
     else if (level == "error")
     {
         return LogSeverity::Error;
     }
     else if (level == "fatal")
     {
         return LogSeverity::Fatal;
     }
     else
     {
         throw armnn::Exception("Unknown severity level for logging: '" + level +
                                "'. Valid options: trace, debug, info, warning, error, fatal");
     }
 }

References Debug, Error, Fatal, Info, Trace, and Warning.

◆ swap() [1/2]

void armnn::swap	(	OriginsDescriptor &	first,
		OriginsDescriptor &	second
	)

Definition at line 356 of file Descriptors.cpp.

 {
     using std::swap;
     swap(first.m_NumViews, second.m_NumViews);
     swap(first.m_NumDimensions, second.m_NumDimensions);
     swap(first.m_ViewOrigins, second.m_ViewOrigins);
     swap(first.m_ConcatAxis, second.m_ConcatAxis);
 }

References swap().

Referenced by BackendId::operator=(), SquashEqualSiblingsImpl< Comparable >::Run(), BackendRegistry::Swap(), and swap().

◆ swap() [2/2]

void armnn::swap	(	ViewsDescriptor &	first,
		ViewsDescriptor &	second
	)

Definition at line 365 of file Descriptors.cpp.

 {
     using std::swap;
     swap(first.m_Origins, second.m_Origins);
     swap(first.m_ViewSizes, second.m_ViewSizes);
     swap(first.m_IsAxisSet, second.m_IsAxisSet);
     swap(first.m_Axis, second.m_Axis);
 }

References swap().

Referenced by swap().

◆ Tile()

void Tile	(	const TileDescriptor &	params,
		const TensorInfo &	inputInfo,
		Decoder< float > &	inputDecoder,
		Encoder< float > &	outputEncoder
	)

Definition at line 45 of file Tile.cpp.

 {
     // Input and output will always have same rank
     uint32_t rank = inputInfo.GetNumDimensions();
  
     TensorShape inputShape = inputInfo.GetShape();
  
     std::vector<uint32_t> outputShape(rank);
     for (uint32_t i = 0; i < rank; ++i)
     {
         outputShape[i] = inputShape[i] * params.m_Multiples[i];
     }
  
     // If all values of multiples are 1, then return the input
     if ( std::adjacent_find( params.m_Multiples.begin(), params.m_Multiples.end(),
                              std::not_equal_to<>() ) == params.m_Multiples.end() && params.m_Multiples[0] == 1)
     {
         for (uint32_t idx = 0; idx < inputInfo.GetNumElements(); ++idx)
         {
             float inputValue = inputDecoder.Get();
             ++inputDecoder;
             outputEncoder.Set(inputValue);
             ++outputEncoder;
         }
         return;
     }
  
     std::vector<float> inputData = inputDecoder.DecodeTensor(inputInfo.GetShape());
     std::vector<float> outputData;
     auto outputNumElements = inputData.size() * static_cast<uint32_t>(std::accumulate(begin(params.m_Multiples),
                                                                                       end(params.m_Multiples),
                                                                                       1,
                                                                                       std::multiplies<>()));
     outputData.reserve(outputNumElements);
  
     for (uint32_t outputIndex = 0; outputIndex < outputNumElements; ++outputIndex)
     {
         std::vector<uint32_t> outputCoords = IndexToCoordinates(outputShape, outputIndex);
  
         // Converting output coordinates to input coordinates using modulus
         std::vector<uint32_t> inputCoordinates;
         inputCoordinates.reserve(rank);
         for (uint32_t i = 0; i < rank; ++i)
         {
             inputCoordinates.push_back(outputCoords[i] % inputShape[i]);
         }
  
         uint32_t inputIndex = CoordinatesToIndex(inputShape, inputCoordinates);
  
         outputEncoder[outputIndex];
         outputEncoder.Set(inputData[inputIndex]);
     }
 }

References CoordinatesToIndex(), Decoder< IType >::DecodeTensor(), Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), IndexToCoordinates(), TileDescriptor::m_Multiples, and Encoder< IType >::Set().

◆ TopKSort()

void TopKSort	(	unsigned int	k,
		unsigned int *	indices,
		const float *	values,
		unsigned int	numElement
	)

Definition at line 25 of file DetectionPostProcess.cpp.

 {
     std::partial_sort(indices, indices + k, indices + numElement,
                       [&values](unsigned int i, unsigned int j) { return values[i] > values[j]; });
 }

Referenced by DetectionPostProcess(), and NonMaxSuppression().

◆ TosaRefBackendId()

constexpr const char* armnn::TosaRefBackendId ( )

constexpr

Definition at line 10 of file TosaRefBackendId.hpp.

10 { return "TosaRef"; }

Referenced by TosaRefBackend::GetIdStatic().

◆ TosaRefPreCompiledWorkloadValidate()

bool TosaRefPreCompiledWorkloadValidate ( std::string * )

Definition at line 166 of file TosaRefPreCompiledWorkload.cpp.

 {
     return true;
 }

◆ TosaRefTensorHandleFactoryId()

constexpr const char* armnn::TosaRefTensorHandleFactoryId ( )

constexpr

Definition at line 15 of file TosaRefTensorHandleFactory.hpp.

15 { return "Arm/TosaRef/TensorHandleFactory"; }

Referenced by TosaRefTensorHandleFactory::GetIdStatic().

◆ TransposeConvolution2dImpl()

void TransposeConvolution2dImpl	(	const TransposeConvolution2dDescriptor &	descriptor,
		const TensorShape &	inputShape,
		Decoder< float > &	inputDecoder,
		const TensorShape &	outputShape,
		Encoder< float > &	outputEncoder,
		const TensorShape &	weightsShape,
		Decoder< float > &	weightsDecoder,
		Decoder< float > *	biasesDecoder
	)

Definition at line 15 of file TransposeConvolution2d.cpp.

 {
     if (descriptor.m_BiasEnabled && !biasesDecoder)
     {
         throw InvalidArgumentException("Biases enabled but no bias data provided");
     }
     const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
     const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
     const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();
     const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();
  
     const unsigned int numBatches = inputShape[0];
  
     const unsigned int inputWidth  = inputShape[widthIndex];
     const unsigned int inputHeight = inputShape[heightIndex];
     const unsigned int inputDepth  = inputShape[channelsIndex];
  
     const unsigned int weightsHeight = weightsShape[heightIndex];
     const unsigned int weightsWidth  = weightsShape[widthIndex];
     const unsigned int weightsDepth  = weightsShape[channelsIndex];
  
     const unsigned int outputHeight = outputShape[heightIndex];
     const unsigned int outputWidth  = outputShape[widthIndex];
     const unsigned int outputDepth  = outputShape[channelsIndex];
  
     const unsigned int paddingLeft = descriptor.m_PadLeft;
     const unsigned int paddingTop  = descriptor.m_PadTop;
  
     const unsigned int strideX = descriptor.m_StrideX;
     const unsigned int strideY = descriptor.m_StrideY;
  
     std::vector<float> outputBuffer(outputShape.GetNumElements(), 0);
  
     const std::vector<float> inputVec = inputDecoder.DecodeTensor(inputShape);
     const std::vector<float> filterVec = weightsDecoder.DecodeTensor(weightsShape);
  
     for (unsigned int batch = 0u; batch < numBatches; ++batch)
     {
         for (unsigned int yInput = 0u; yInput < inputHeight; ++yInput)
         {
             for (unsigned int xInput = 0u; xInput < inputWidth; ++xInput)
             {
                 unsigned int xOutputOrigin = xInput * strideX - paddingLeft;
                 unsigned int yOutputOrigin = yInput * strideY - paddingTop;
  
                 for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
                 {
                     for (unsigned int yWeights = 0u; yWeights < weightsHeight; ++yWeights)
                     {
                         for (unsigned int xWeights = 0u; xWeights < weightsWidth; ++xWeights)
                         {
                             unsigned int yOutput = yOutputOrigin + yWeights;
                             unsigned int xOutput = xOutputOrigin + xWeights;
  
                             if (yOutput < outputHeight && xOutput< outputWidth)
                             {
                                 for (unsigned int dInput = 0u; dInput < inputDepth; dInput++)
                                 {
                                     unsigned int inputIndex;
                                     unsigned int outputIndex;
                                     unsigned int weightsIndex;
  
                                     if(descriptor.m_DataLayout == armnn::DataLayout::NHWC)
                                     {
                                         inputIndex   = batch  * inputHeight * inputWidth * inputDepth +
                                                        yInput * inputWidth * inputDepth +
                                                        xInput * inputDepth +
                                                        dInput;
  
                                         weightsIndex = dOutput  * weightsHeight * weightsWidth * weightsDepth +
                                                        yWeights * weightsWidth * weightsDepth +
                                                        xWeights * weightsDepth +
                                                        dInput;
  
                                         outputIndex  = batch   * outputHeight * outputWidth * outputDepth +
                                                        yOutput * outputWidth * outputDepth +
                                                        xOutput * outputDepth +
                                                        dOutput;
                                     }
                                     else
                                     {
                                         inputIndex   = batch  * inputDepth * inputHeight * inputWidth +
                                                        dInput * inputHeight * inputWidth +
                                                        yInput * inputWidth +
                                                        xInput;
  
                                         weightsIndex = dOutput  * weightsDepth * weightsHeight * weightsWidth +
                                                        dInput   * weightsHeight * weightsWidth +
                                                        yWeights * weightsWidth +
                                                        xWeights;
  
                                         outputIndex  = batch   * outputDepth * outputHeight * outputWidth +
                                                        dOutput * outputHeight * outputWidth +
                                                        yOutput * outputWidth +
                                                        xOutput;
                                     }
  
                                     outputBuffer[outputIndex] += inputVec[inputIndex] * filterVec[weightsIndex];
                                 }
                             }
                         }
                     }
  
                 }
             }
         }
     }
  
     // Apply bias (if enabled)
     if (descriptor.m_BiasEnabled)
     {
         outputEncoder[0];
         Decoder<float>& rBiasesDecoder = *biasesDecoder;
  
         for (unsigned int batch = 0u; batch < numBatches; ++batch)
         {
             for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
             {
                 rBiasesDecoder[dOutput];
                 for (unsigned int yOutput = 0u; yOutput < outputHeight; ++yOutput)
                 {
                     for (unsigned int xOutput = 0u; xOutput < outputWidth; ++xOutput)
                     {
                         const unsigned int outputIndex =
                             dataLayoutIndexed.GetIndex(outputShape, batch, dOutput, yOutput, xOutput);
                         outputBuffer[outputIndex] += rBiasesDecoder.Get();
                     }
                 }
             }
         }
     }
     outputEncoder[0];
     for (float output : outputBuffer)
     {
         outputEncoder.Set(output);
         ++outputEncoder;
     }
 }

References Decoder< IType >::DecodeTensor(), Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorShape::GetNumElements(), DataLayoutIndexed::GetWidthIndex(), TransposeConvolution2dDescriptor::m_BiasEnabled, TransposeConvolution2dDescriptor::m_DataLayout, TransposeConvolution2dDescriptor::m_PadLeft, TransposeConvolution2dDescriptor::m_PadTop, TransposeConvolution2dDescriptor::m_StrideX, TransposeConvolution2dDescriptor::m_StrideY, NHWC, and Encoder< IType >::Set().

◆ TrueFunc()

bool armnn::TrueFunc	(	Optional< std::string & >	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 54 of file LayerSupportCommon.hpp.

 {
     IgnoreUnused(reasonIfUnsupported);
     IgnoreUnused(params...);
     return true;
 }

References IgnoreUnused().

◆ ValidateSourcesMatchOptimizedNetwork()

void armnn::ValidateSourcesMatchOptimizedNetwork	(	std::vector< BackendOptions >	optimizedOptions,
		const INetworkProperties &	networkProperties
	)

This function performs a sanity check to ensure that the combination of input and output memory source matches the values for importEnabled and exportEnabled that were specified during optimization.

During optimization the tensor handle factories are chosen based on whether import and export are enabled. If the user then specifies something incompatible here it can lead to problems.

Parameters

optimizedOptions
networkProperties

Definition at line 98 of file LoadedNetwork.cpp.

 {
     // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
     // added as backend options.
     const vector<BackendOptions>::iterator& backendItr =
         find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
             if (backend.GetBackendId().Get() == "Global")
             {
                 return true;
             }
             else
             {
                 return false;
             }
         });
     bool importEnabled = false;
     bool exportEnabled = false;
     if (backendItr != optimizedOptions.end())
     {
         // Find the importEnabled and exportEnabled values.
         for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
         {
             const BackendOptions::BackendOption& option = backendItr->GetOption(i);
             if (option.GetName() == "ImportEnabled")
             {
                 importEnabled = option.GetValue().AsBool();
             }
             if (option.GetName() == "ExportEnabled")
             {
                 exportEnabled = option.GetValue().AsBool();
             }
         }
     }
  
     // Now that we have values for import and export compare them to the MemorySource variables.
     // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
     if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
         (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
     {
         auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
         if (!importEnabled)
         {
             message.append(" requires that memory import be enabled. However, "
                            "it was disabled when this network was optimized.");
         }
         else
         {
             message.append(" requires that memory import be disabled. However, "
                            "it was enabled when this network was optimized.");
         }
         throw InvalidArgumentException(message);
     }
  
     if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
         (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
     {
         auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
         if (!exportEnabled)
         {
             message.append(" requires that memory export be enabled. However, "
                            "it was disabled when this network was optimized.");
         }
         else
         {
             message.append(" requires that memory export be disabled. However, "
                            "it was enabled when this network was optimized.");
         }
         throw InvalidArgumentException(message);
     }
 } // anonymous

◆ VerifyClContextBuffer()

bool armnn::VerifyClContextBuffer ( flatbuffers::Verifier & verifier )

inline

Definition at line 157 of file ClContextSchema_generated.h.

                                    {
   return verifier.VerifyBuffer<armnn::ClContext>(ClContextIdentifier());
 }

References ClContextIdentifier().

◆ VerifySizePrefixedClContextBuffer()

bool armnn::VerifySizePrefixedClContextBuffer ( flatbuffers::Verifier & verifier )

inline

Definition at line 162 of file ClContextSchema_generated.h.

                                    {
   return verifier.VerifySizePrefixedBuffer<armnn::ClContext>(ClContextIdentifier());
 }

References ClContextIdentifier().

◆ VerifyTensorInfoDataType()

void armnn::VerifyTensorInfoDataType	(	const armnn::TensorInfo &	info,
		armnn::DataType	dataType
	)

inline

Definition at line 382 of file TypesUtils.hpp.

 {
     if (info.GetDataType() != dataType)
     {
         std::stringstream ss;
         ss << "Unexpected datatype:" << armnn::GetDataTypeName(info.GetDataType())
            << " for tensor:" << info.GetShape()
            << ". The type expected to be: " << armnn::GetDataTypeName(dataType);
         throw armnn::Exception(ss.str());
     }
 }

References GetDataTypeName(), and info.

◆ WrapClError()

RuntimeException WrapClError	(	const cl::Error &	clError,
		const CheckLocation &	location
	)

inline

Definition at line 160 of file ClWorkloadUtils.hpp.

 {
     std::stringstream message;
     message << "CL error: " << clError.what() << ". Error code: " << clError.err();
  
     return RuntimeException(message.str(), location);
 }

References Exception::what().

Referenced by RunClFunction().

Variable Documentation

◆ cpuAccCapabilities

const BackendCapabilities cpuAccCapabilities("CpuAcc", { {"NonConstWeights", true}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", arm_compute::CPUInfo::get().has_fp16()} })

Referenced by NeonBackend::GetCapabilities().

◆ cpuRefCapabilities

const BackendCapabilities cpuRefCapabilities("CpuRef", { {"NonConstWeights", true}, {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", true} })

Referenced by RefBackend::GetCapabilities().

◆ EXPIRE_RATE

constexpr unsigned int EXPIRE_RATE = 3U

constexpr

Variable to control expire rate of priority queue.

Definition at line 37 of file Types.hpp.

◆ g_AggregateProfilingEventsByInference

constexpr bool g_AggregateProfilingEventsByInference = true

constexpr

Definition at line 37 of file Profiling.cpp.

Referenced by ProfilerImpl::AnalyzeEventsAndWriteResults().

◆ g_ProfilingEventCountHint

constexpr std::size_t g_ProfilingEventCountHint = 1024

constexpr

Definition at line 29 of file Profiling.cpp.

Referenced by ProfilerImpl::ProfilerImpl().

◆ g_WriteProfilingEventSequence

constexpr bool g_WriteProfilingEventSequence = true

constexpr

Definition at line 32 of file Profiling.cpp.

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults().

◆ g_WriteReportToStdOutOnProfilerDestruction

constexpr bool g_WriteReportToStdOutOnProfilerDestruction = false

constexpr

Definition at line 41 of file Profiling.cpp.

Referenced by ProfilerImpl::~ProfilerImpl().

◆ gpuFsaCapabilities

const BackendCapabilities gpuFsaCapabilities("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", false} })

Referenced by GpuFsaBackend::GetCapabilities().

◆ LOWEST_CAPTURE_PERIOD

constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u

constexpr

The lowest performance data capture interval we support is 10 miliseconds.

Definition at line 34 of file Types.hpp.

◆ MaxNumOfTensorDimensions

constexpr unsigned int MaxNumOfTensorDimensions = 5U

constexpr

Definition at line 31 of file Types.hpp.

Referenced by Concatenate(), CopyTensorContentsGeneric(), PermutationVector::PermutationVector(), armnnUtils::Permuted(), Split(), Splitter(), TosaTensorNumDimensionsWithinBounds::TosaTensorNumDimensionsWithinBounds(), armnnDeserializer::ToTensorInfo(), and armnnUtils::TransposeTensorShape().

◆ oldCpuRefCapabilities

const std::set<armnn::BackendCapability> oldCpuRefCapabilities

Initial value:

{
        armnn::BackendCapability::NonConstWeights,
}

Definition at line 25 of file RefBackend.hpp.

◆ paddingRequiredLayers

const std::set<armnn::LayerType> paddingRequiredLayers

Initial value:

{
    LayerType::ArgMinMax,
    LayerType::Convolution2d,
    LayerType::DepthToSpace,
    LayerType::DepthwiseConvolution2d,
    LayerType::Dequantize,
    LayerType::FullyConnected,
    LayerType::Gather,
    LayerType::Lstm,
    LayerType::Mean,
    LayerType::Permute,
    LayerType::Pooling2d,
    LayerType::Quantize,
    LayerType::QuantizedLstm,
    LayerType::Stack,
    LayerType::TransposeConvolution2d
}

Definition at line 16 of file NeonTensorHandleFactory.hpp.

Referenced by NeonTensorHandleFactory::GetCapabilities().

◆ tl_Profiler

thread_local IProfiler* tl_Profiler = nullptr

Definition at line 591 of file Profiling.cpp.

Referenced by ProfilerManager::GetProfiler(), and ProfilerManager::RegisterProfiler().

◆ wordSize

constexpr size_t wordSize = sizeof(size_t) * 8

constexpr

Definition at line 22 of file SingleAxisPriorityList.cpp.

Namespaces

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Typedef Documentation

◆ ACLMemManagerOnDemand

◆ AdditionalInfoObjectPtr

◆ BackendCapabilities

◆ BackendIdSet

◆ BackendIdVector

◆ BackendsMap

◆ BaseFloat32ComparisonWorkload

◆ BaseUint8ComparisonWorkload

◆ BFloat16ToFloat32Workload

◆ BindingPointInfo

◆ BooleanWorkload

◆ CompiledBlobDeleter

◆ CompiledBlobPtr

◆ ConcatDescriptor

◆ Coordinates

◆ CopyAndImportFactoryPairs

◆ DebugCallbackFunction

◆ DepthToSpaceDescriptor

◆ Dimensions

◆ DynamicBackendPtr

◆ FactoryId

◆ Float16ToFloat32Workload

◆ Float32ToBFloat16Workload

◆ Float32ToFloat16Workload

◆ Float32Workload

◆ FloatWorkload

◆ Half

◆ HighResolutionClock

◆ IBackendContextUniquePtr

◆ IBackendInternalUniquePtr

◆ IBackendSharedPtr

◆ IBackendUniquePtr

◆ IGpuAccTunedParametersPtr

◆ IInitialiseProfilingService

◆ ILayerSupportSharedPtr

◆ IMemoryManagerUniquePtr

◆ ImportedInputId

◆ ImportedOutputId

◆ INetworkPtr

◆ InferenceTimingPair

◆ InputQueueDescriptor

◆ InputTensors

◆ Int32Workload

◆ IOptimizedNetworkPtr

◆ IReportStructure

◆ IRuntimePtr

◆ LayerBindingId

◆ LayerPriority

◆ LayerTypeOf

◆ LoadedNetworks

◆ LogSoftmaxDescriptor

◆ MemoryOptimizerStrategiesMapRef

◆ MemorySourceFlags

◆ MergerDescriptor

◆ MergerQueueDescriptor

◆ ModelOptions

◆ NetworkId

◆ NetworkImplPtr

◆ NetworkOptions

◆ OutputQueueDescriptor

◆ OutputTensors

◆ ParameterStringifyFunction

◆ PreCompiledObjectDeleter

◆ PreCompiledObjectPtr

◆ RefAdditionWorkload

◆ RefDebugBFloat16Workload

◆ RefDebugFloat16Workload

◆ RefDebugFloat32Workload

◆ RefDebugQAsymmS8Workload

◆ RefDebugQAsymmU8Workload

◆ RefDebugQSymmS16Workload

◆ RefDebugQSymmS8Workload