ArmNN
 24.08
armnn Namespace Reference

Copyright (c) 2021 ARM Limited and Contributors. More...

Namespaces

 experimental
 
 optimizations
 
 profiling
 
 stringUtils
 
 timelinedecoder
 
 utility
 

Classes

struct  abs
 
class  AbsLayer
 
struct  AbsQueueDescriptor
 
struct  ActivationDescriptor
 An ActivationDescriptor for the ActivationLayer. More...
 
class  ActivationLayer
 This layer represents an activation operation with the specified activation function. More...
 
struct  ActivationQueueDescriptor
 
class  AddedLayerObservable
 
class  AdditionLayer
 This layer represents an addition operation. More...
 
struct  AdditionQueueDescriptor
 
struct  Allocator
 
struct  ArgMinMaxDescriptor
 An ArgMinMaxDescriptor for ArgMinMaxLayer. More...
 
class  ArgMinMaxLayer
 This layer represents a ArgMinMax operation. More...
 
struct  ArgMinMaxQueueDescriptor
 
class  ArmNNProfilingServiceInitialiser
 
class  BackendCapabilityException
 
class  BackendId
 
struct  BackendOptions
 Struct for the users to pass backend specific options. More...
 
class  BackendRegistry
 
struct  BackendSettings
 
class  BackendUnavailableException
 Class for non-fatal exceptions raised while initialising a backend. More...
 
struct  BackendVersion
 
class  BadOptionalAccessException
 
struct  BaseDescriptor
 Base class for all descriptors. More...
 
class  BaseIterator
 
class  BaseMemoryManager
 
class  BaseTensor
 
class  BaseWorkload
 
class  BatchMatMul
 
struct  BatchMatMulDescriptor
 A BatchMatMulDescriptor for the BatchMatMul operator. More...
 
class  BatchMatMulLayer
 
struct  BatchMatMulQueueDescriptor
 
struct  BatchNormalizationDescriptor
 A BatchNormalizationDescriptor for the BatchNormalizationLayer. More...
 
class  BatchNormalizationLayer
 This layer represents a batch normalization operation. More...
 
struct  BatchNormalizationQueueDescriptor
 
struct  BatchToSpaceNdDescriptor
 A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer. More...
 
class  BatchToSpaceNdLayer
 This layer represents a BatchToSpaceNd operation. More...
 
struct  BatchToSpaceNdQueueDescriptor
 
class  BFloat16
 
struct  BiasAndWeightsTypesCompatible
 
struct  BiasAndWeightsTypesMatch
 
class  BindableLayer
 
class  BooleanDecoder
 
class  BooleanDecoderBool
 
class  BooleanEncoder
 
struct  BroadcastLoop
 
struct  BroadcastToDescriptor
 
class  BroadcastToLayer
 
struct  BroadcastToQueueDescriptor
 
struct  BufferStorage
 
struct  Capability
 Capability of the TensorHandleFactory. More...
 
class  CastLayer
 This layer represents a cast operation. More...
 
struct  CastQueueDescriptor
 
struct  ceil
 
struct  ChannelShuffleDescriptor
 A ChannelShuffleDescriptor for the ChannelShuffle operator. More...
 
class  ChannelShuffleLayer
 
struct  ChannelShuffleQueueDescriptor
 
struct  CheckLocation
 
class  ClAbsWorkload
 
class  ClActivationWorkload
 
class  ClAdditionWorkload
 
class  ClArgMinMaxWorkload
 
class  ClBackend
 
class  ClBackendContext
 
class  ClBackendDefaultAllocator
 Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...
 
class  ClBackendModelContext
 The ClBackendModelContext is used to pass in CL specific backend ModelOptions. More...
 
class  ClBaseWorkload
 
class  ClBatchMatMulWorkload
 
class  ClBatchNormalizationFloatWorkload
 
class  ClBatchToSpaceNdWorkload
 
class  ClCastWorkload
 
class  ClChannelShuffleWorkload
 
class  ClComparisonWorkload
 
class  ClConcatWorkload
 
class  ClConstantWorkload
 
struct  ClContextBuilder
 
class  ClContextControl
 
class  ClContextDeserializer
 
class  ClContextSerializer
 
class  ClConvertFp16ToFp32Workload
 
class  ClConvertFp32ToFp16Workload
 
class  ClConvolution2dWorkload
 
class  ClConvolution3dWorkload
 
class  ClDepthToSpaceWorkload
 
class  ClDepthwiseConvolutionWorkload
 
class  ClDequantizeWorkload
 
class  ClDivisionWorkload
 
class  ClElementwiseBinaryWorkload
 
class  ClExpWorkload
 
class  ClFillWorkload
 
class  ClFloorFloatWorkload
 
class  ClFullyConnectedWorkload
 
class  ClGatherNdWorkload
 
class  ClGatherWorkload
 
class  ClImportSubTensorHandle
 
class  ClImportTensorHandle
 
class  ClImportTensorHandleFactory
 This factory creates ClImportTensorHandles that refer to imported memory tensors. More...
 
class  ClInstanceNormalizationWorkload
 
class  ClL2NormalizationFloatWorkload
 
class  ClLayerSupport
 
class  ClLogicalAndWorkload
 
class  ClLogicalNotWorkload
 
class  ClLogicalOrWorkload
 
class  ClLogSoftmaxWorkload
 
class  ClLogWorkload
 
class  ClLstmFloatWorkload
 
class  ClMaximumWorkload
 
class  ClMeanWorkload
 
class  ClMemoryManager
 
class  ClMinimumWorkload
 
class  ClMultiplicationWorkload
 
class  ClNegWorkload
 
class  ClNormalizationFloatWorkload
 
class  ClPadWorkload
 
class  ClPermuteWorkload
 
class  ClPooling2dWorkload
 
class  ClPooling3dWorkload
 
class  ClPreluWorkload
 
class  ClQLstmWorkload
 
class  ClQuantizedLstmWorkload
 
class  ClQuantizeWorkload
 
struct  ClRankWorkload
 
class  ClReduceWorkload
 
class  ClReshapeWorkload
 
class  ClResizeWorkload
 
class  ClReverseV2Workload
 
class  ClRsqrtWorkload
 
class  ClRuntimeUnavailableException
 
class  ClScatterNdWorkload
 
class  ClSinWorkload
 
class  ClSliceWorkload
 
class  ClSoftmaxWorkload
 
class  ClSpaceToBatchNdWorkload
 
class  ClSpaceToDepthWorkload
 
class  ClSplitterWorkload
 
class  ClSqrtWorkload
 
class  ClStackWorkload
 
class  ClStridedSliceWorkload
 
class  ClSubTensorHandle
 
class  ClSubtractionWorkload
 
class  ClTensorDecorator
 ClTensorDecorator wraps an existing CL tensor allowing us to override the TensorInfo for it. More...
 
class  ClTensorHandle
 
class  ClTensorHandleDecorator
 
class  ClTensorHandleFactory
 
class  ClTileWorkload
 
class  ClTransposeConvolution2dWorkload
 
class  ClTransposeWorkload
 
class  ClTunedParameters
 
class  ClUnidirectionalSequenceLstmFloatWorkload
 
class  ClWorkloadFactory
 
struct  ComparisonDescriptor
 A ComparisonDescriptor for the ComparisonLayer. More...
 
class  ComparisonLayer
 This layer represents a comparison operation. More...
 
struct  ComparisonQueueDescriptor
 
class  ConcatLayer
 This layer represents a merge operation. More...
 
struct  ConcatQueueDescriptor
 
class  ConstantLayer
 A layer that the constant data can be bound to. More...
 
class  ConstantMemoryStrategy
 
struct  ConstantQueueDescriptor
 
class  ConstPassthroughTensorHandle
 
struct  ConstructInPlace
 Disambiguation tag that can be passed to the constructor to indicate that the contained object should be constructed in-place. More...
 
class  ConstTensor
 A tensor defined by a TensorInfo (shape and data type) and an immutable backing store. More...
 
class  ConstTensorHandle
 
class  ConvertFp16ToFp32Layer
 This layer converts data type Float 16 to Float 32. More...
 
struct  ConvertFp16ToFp32QueueDescriptor
 
class  ConvertFp32ToFp16Layer
 This layer converts data type Float 32 to Float 16. More...
 
struct  ConvertFp32ToFp16QueueDescriptor
 
struct  Convolution2dDescriptor
 A Convolution2dDescriptor for the Convolution2dLayer. More...
 
class  Convolution2dLayer
 This layer represents a convolution 2d operation. More...
 
struct  Convolution2dQueueDescriptor
 
struct  Convolution3dDescriptor
 A Convolution3dDescriptor for the Convolution3dLayer. More...
 
class  Convolution3dLayer
 This layer represents a convolution 3d operation. More...
 
struct  Convolution3dQueueDescriptor
 
class  CopyMemGenericWorkload
 
class  DebugLayer
 This layer visualizes the data flowing through the network. More...
 
struct  DebugQueueDescriptor
 
class  Decoder
 
class  DefaultAllocator
 Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...
 
class  DepthToSpaceLayer
 This layer represents a DepthToSpace operation. More...
 
struct  DepthToSpaceQueueDescriptor
 
struct  DepthwiseConvolution2dDescriptor
 A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer. More...
 
class  DepthwiseConvolution2dLayer
 This layer represents a depthwise convolution 2d operation. More...
 
struct  DepthwiseConvolution2dQueueDescriptor
 Depthwise Convolution 2D layer workload data. More...
 
class  DequantizeLayer
 This layer dequantizes the input tensor. More...
 
struct  DequantizeQueueDescriptor
 
struct  DetectionPostProcessDescriptor
 
class  DetectionPostProcessLayer
 This layer represents a detection postprocess operator. More...
 
struct  DetectionPostProcessQueueDescriptor
 
class  DeviceSpec
 
class  DivisionLayer
 This layer represents a division operation. More...
 
struct  DivisionQueueDescriptor
 
class  DotAttributeSet
 
class  DotBase
 
class  DotDefaults
 
class  DotEdge
 
class  DotGraph
 
class  DotNode
 
class  DynamicBackend
 
class  DynamicBackendUtils
 
class  ElementwiseBaseLayer
 NOTE: this is an abstract class to encapsulate the element wise operations, it does not implement: std::unique_ptr<IWorkload> Layer::CreateWorkload(const IWorkloadFactory& factory) const = 0; Layer* Clone(Graph& graph) const = 0;. More...
 
struct  ElementwiseBinaryDescriptor
 A ElementwiseBinaryDescriptor for the ElementwiseBinaryLayer. More...
 
struct  ElementwiseBinaryFunction
 
class  ElementwiseBinaryLayer
 This layer represents a elementwiseBinary operation. More...
 
struct  ElementwiseBinaryQueueDescriptor
 
struct  ElementwiseUnaryDescriptor
 A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer. More...
 
struct  ElementwiseUnaryFunction
 
class  ElementwiseUnaryLayer
 This layer represents a elementwiseUnary operation. More...
 
struct  ElementwiseUnaryQueueDescriptor
 
struct  EmptyOptional
 EmptyOptional is used to initialize the Optional class in case we want to have default value for an Optional in a function declaration. More...
 
class  Encoder
 
struct  EqualQueueDescriptor
 
class  ErasedLayerNamesObservable
 
class  Event
 Event class records measurements reported by BeginEvent()/EndEvent() and returns measurements when Event::GetMeasurements() is called. More...
 
class  Exception
 Base class for all ArmNN exceptions so that users can filter to just those. More...
 
class  ExecutionFrame
 
struct  exp
 
struct  FakeQuantizationDescriptor
 A FakeQuantizationDescriptor for the FakeQuantizationLayer. More...
 
class  FakeQuantizationLayer
 This layer represents a fake quantization operation. More...
 
struct  FakeQuantizationQueueDescriptor
 
class  FileNotFoundException
 
struct  FillDescriptor
 A FillDescriptor for the FillLayer. More...
 
class  FillLayer
 This layer represents a fill operation. More...
 
struct  FillQueueDescriptor
 
class  FirstInputTypedWorkload
 
struct  FLATBUFFERS_FINAL_CLASS
 
class  Float16Decoder
 
class  Float16Encoder
 
class  Float32Decoder
 
class  Float32Encoder
 
struct  floorDiv
 
class  FloorLayer
 This layer represents a floor operation. More...
 
struct  FloorQueueDescriptor
 
struct  FullyConnectedDescriptor
 A FullyConnectedDescriptor for the FullyConnectedLayer. More...
 
class  FullyConnectedLayer
 This layer represents a fully connected operation. More...
 
struct  FullyConnectedQueueDescriptor
 
struct  FusedDescriptor
 A FusedDescriptor for the FusedLayer. More...
 
class  FusedLayer
 
struct  FusedQueueDescriptor
 
struct  GatherDescriptor
 A GatherDescriptor for the GatherLayer. More...
 
class  GatherLayer
 This layer represents a Gather operator. More...
 
class  GatherNdLayer
 This layer represents a GatherNd operator. More...
 
struct  GatherNdQueueDescriptor
 
struct  GatherQueueDescriptor
 
class  GpuFsaBackend
 
class  GpuFsaBackendContext
 
class  GpuFsaBackendDefaultAllocator
 Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...
 
class  GpuFsaBaseWorkload
 
class  GpuFsaConstantWorkload
 
class  GpuFsaContextControl
 
class  GpuFsaLayerSupport
 
class  GpuFsaMemoryManager
 
struct  GpuFsaPreCompiledBlob
 A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend. More...
 
class  GpuFsaPreCompiledWorkload
 
class  GpuFsaSubTensorHandle
 
class  GpuFsaTensorHandle
 
class  GpuFsaTensorHandleFactory
 
class  GpuFsaWorkloadFactory
 
class  Graph
 
class  GraphObservable
 
class  GraphValidationException
 
struct  GreaterQueueDescriptor
 
class  HtmlBold
 
class  HtmlFont
 
class  HtmlSection
 
class  HtmlSimpleTag
 
class  IAclTensorHandle
 
class  IBackend
 Each backend should implement an IBackend. More...
 
class  IBackendContext
 
class  IBackendInternal
 
class  IBackendModelContext
 
class  IClTensorHandle
 
class  ICLTensorProxy
 
class  IConnectableLayer
 Interface for a layer that is connectable to other layers via InputSlots and OutputSlots. More...
 
class  ICustomAllocator
 Custom Allocator interface. More...
 
class  IDeviceSpec
 Device specific knowledge to be passed to the optimizer. More...
 
class  IExecutionFrame
 ExecutionFrame interface to enqueue a workload computation. More...
 
class  IGpuAccTunedParameters
 Manages a set of GpuAcc parameters which have been tuned for maximum performance. More...
 
class  IGraphObservable
 
class  IInputSlot
 An input connection slot for a layer. More...
 
class  ILayerSupport
 
class  IMemoryManager
 
class  IMemoryOptimizerStrategy
 
struct  IMemoryOptimizerStrategyFactory
 
class  ImportMemGenericWorkload
 
class  INetwork
 Main network class which provides the interface for building up a neural network. More...
 
struct  INetworkProperties
 
class  InputLayer
 A layer user-provided data can be bound to (e.g. inputs, outputs). More...
 
class  InputSlot
 
struct  InstanceNormalizationDescriptor
 An InstanceNormalizationDescriptor for InstanceNormalizationLayer. More...
 
class  InstanceNormalizationLayer
 This layer represents an instance normalization operation. More...
 
struct  InstanceNormalizationQueueDescriptor
 
class  Instrument
 
class  Int32Decoder
 
class  Int32Encoder
 
class  Int32ToInt32tDecoder
 
class  Int32ToInt32tEncoder
 
class  Int64Decoder
 
class  Int64Encoder
 
class  InvalidArgumentException
 
class  IOptimizedNetwork
 
class  IOutputSlot
 An output connection slot for a layer. More...
 
class  IProfiler
 
class  IRuntime
 
struct  IsHalfType
 
struct  IsMemorySource
 
struct  IsMemorySource< MemorySource >
 
class  IStrategy
 
class  ISubgraphViewConverter
 
class  ITensorHandle
 
class  ITensorHandleFactory
 
class  IWorkload
 Workload interface to enqueue a layer computation. More...
 
class  IWorkloadFactory
 
struct  JsonChildObject
 
class  JsonPrinter
 
class  JsonUtils
 
struct  L2NormalizationDescriptor
 A L2NormalizationDescriptor for the L2NormalizationLayer. More...
 
class  L2NormalizationLayer
 This layer represents a L2 normalization operation. More...
 
struct  L2NormalizationQueueDescriptor
 
class  Layer
 
class  LayerSupportBase
 
class  LayerSupportHandle
 
struct  LayerTypeOfImpl
 
struct  LayerTypeOfImpl< LayerType::Activation >
 
struct  LayerTypeOfImpl< LayerType::Addition >
 
struct  LayerTypeOfImpl< LayerType::ArgMinMax >
 
struct  LayerTypeOfImpl< LayerType::BatchMatMul >
 
struct  LayerTypeOfImpl< LayerType::BatchNormalization >
 
struct  LayerTypeOfImpl< LayerType::BatchToSpaceNd >
 
struct  LayerTypeOfImpl< LayerType::BroadcastTo >
 
struct  LayerTypeOfImpl< LayerType::Cast >
 
struct  LayerTypeOfImpl< LayerType::ChannelShuffle >
 
struct  LayerTypeOfImpl< LayerType::Comparison >
 
struct  LayerTypeOfImpl< LayerType::Concat >
 
struct  LayerTypeOfImpl< LayerType::Constant >
 
struct  LayerTypeOfImpl< LayerType::ConvertFp16ToFp32 >
 
struct  LayerTypeOfImpl< LayerType::ConvertFp32ToFp16 >
 
struct  LayerTypeOfImpl< LayerType::Convolution2d >
 
struct  LayerTypeOfImpl< LayerType::Convolution3d >
 
struct  LayerTypeOfImpl< LayerType::Debug >
 
struct  LayerTypeOfImpl< LayerType::DepthToSpace >
 
struct  LayerTypeOfImpl< LayerType::DepthwiseConvolution2d >
 
struct  LayerTypeOfImpl< LayerType::Dequantize >
 
struct  LayerTypeOfImpl< LayerType::DetectionPostProcess >
 
struct  LayerTypeOfImpl< LayerType::Division >
 
struct  LayerTypeOfImpl< LayerType::ElementwiseBinary >
 
struct  LayerTypeOfImpl< LayerType::ElementwiseUnary >
 
struct  LayerTypeOfImpl< LayerType::FakeQuantization >
 
struct  LayerTypeOfImpl< LayerType::Fill >
 
struct  LayerTypeOfImpl< LayerType::Floor >
 
struct  LayerTypeOfImpl< LayerType::FullyConnected >
 
struct  LayerTypeOfImpl< LayerType::Fused >
 
struct  LayerTypeOfImpl< LayerType::Gather >
 
struct  LayerTypeOfImpl< LayerType::GatherNd >
 
struct  LayerTypeOfImpl< LayerType::Input >
 
struct  LayerTypeOfImpl< LayerType::InstanceNormalization >
 
struct  LayerTypeOfImpl< LayerType::L2Normalization >
 
struct  LayerTypeOfImpl< LayerType::LogicalBinary >
 
struct  LayerTypeOfImpl< LayerType::LogSoftmax >
 
struct  LayerTypeOfImpl< LayerType::Lstm >
 
struct  LayerTypeOfImpl< LayerType::Map >
 
struct  LayerTypeOfImpl< LayerType::Maximum >
 
struct  LayerTypeOfImpl< LayerType::Mean >
 
struct  LayerTypeOfImpl< LayerType::MemCopy >
 
struct  LayerTypeOfImpl< LayerType::MemImport >
 
struct  LayerTypeOfImpl< LayerType::Merge >
 
struct  LayerTypeOfImpl< LayerType::Minimum >
 
struct  LayerTypeOfImpl< LayerType::Multiplication >
 
struct  LayerTypeOfImpl< LayerType::Normalization >
 
struct  LayerTypeOfImpl< LayerType::Output >
 
struct  LayerTypeOfImpl< LayerType::Pad >
 
struct  LayerTypeOfImpl< LayerType::Permute >
 
struct  LayerTypeOfImpl< LayerType::Pooling2d >
 
struct  LayerTypeOfImpl< LayerType::Pooling3d >
 
struct  LayerTypeOfImpl< LayerType::PreCompiled >
 
struct  LayerTypeOfImpl< LayerType::Prelu >
 
struct  LayerTypeOfImpl< LayerType::QLstm >
 
struct  LayerTypeOfImpl< LayerType::Quantize >
 
struct  LayerTypeOfImpl< LayerType::QuantizedLstm >
 
struct  LayerTypeOfImpl< LayerType::Rank >
 
struct  LayerTypeOfImpl< LayerType::Reduce >
 
struct  LayerTypeOfImpl< LayerType::Reshape >
 
struct  LayerTypeOfImpl< LayerType::Resize >
 
struct  LayerTypeOfImpl< LayerType::ReverseV2 >
 
struct  LayerTypeOfImpl< LayerType::ScatterNd >
 
struct  LayerTypeOfImpl< LayerType::Shape >
 
struct  LayerTypeOfImpl< LayerType::Slice >
 
struct  LayerTypeOfImpl< LayerType::Softmax >
 
struct  LayerTypeOfImpl< LayerType::SpaceToBatchNd >
 
struct  LayerTypeOfImpl< LayerType::SpaceToDepth >
 
struct  LayerTypeOfImpl< LayerType::Splitter >
 
struct  LayerTypeOfImpl< LayerType::Stack >
 
struct  LayerTypeOfImpl< LayerType::StandIn >
 
struct  LayerTypeOfImpl< LayerType::StridedSlice >
 
struct  LayerTypeOfImpl< LayerType::Subtraction >
 
struct  LayerTypeOfImpl< LayerType::Switch >
 
struct  LayerTypeOfImpl< LayerType::Tile >
 
struct  LayerTypeOfImpl< LayerType::Transpose >
 
struct  LayerTypeOfImpl< LayerType::TransposeConvolution2d >
 
struct  LayerTypeOfImpl< LayerType::UnidirectionalSequenceLstm >
 
struct  LayerTypeOfImpl< LayerType::Unmap >
 
class  LayerValidationException
 
class  LayerWithParameters
 
class  LoadedNetwork
 
struct  log
 
struct  LogicalBinaryDescriptor
 A LogicalBinaryDescriptor for the LogicalBinaryLayer. More...
 
struct  LogicalBinaryFunction
 
class  LogicalBinaryLayer
 This layer represents a Logical Binary operation. More...
 
struct  LogicalBinaryQueueDescriptor
 
struct  LogicalUnaryFunction
 
class  LogSink
 
class  LogSoftmaxLayer
 This layer represents a log softmax operation. More...
 
struct  LogSoftmaxQueueDescriptor
 
struct  LstmBasicParameters
 
struct  LstmDescriptor
 An LstmDescriptor for the LstmLayer. More...
 
struct  LstmInputParams
 
struct  LstmInputParamsInfo
 
class  LstmLayer
 This layer represents a LSTM operation. More...
 
struct  LstmOptCifgParameters
 
struct  LstmOptLayerNormParameters
 
struct  LstmOptPeepholeParameters
 
struct  LstmOptProjectionParameters
 
struct  LstmQueueDescriptor
 
class  ManagedConstTensorHandle
 
class  MapLayer
 This layer represents a memory copy operation. More...
 
struct  MapQueueDescriptor
 
class  MapWorkload
 
struct  maximum
 
class  MaximumLayer
 This layer represents a maximum operation. More...
 
struct  MaximumQueueDescriptor
 
struct  MeanDescriptor
 A MeanDescriptor for the MeanLayer. More...
 
class  MeanLayer
 This layer represents a mean operation. More...
 
struct  MeanQueueDescriptor
 
struct  Measurement
 
struct  MemBin
 
struct  MemBlock
 
class  MemCopyLayer
 This layer represents a memory copy operation. More...
 
struct  MemCopyQueueDescriptor
 
class  MemImportLayer
 This layer represents a memory import operation. More...
 
struct  MemImportQueueDescriptor
 
class  MemoryExportException
 
class  MemoryImportException
 
struct  MemoryInfo
 
class  MemoryManager
 
struct  MemoryRequirements
 
class  MemoryValidationException
 
struct  MemSyncQueueDescriptor
 
class  MergeLayer
 This layer dequantizes the input tensor. More...
 
struct  MergeQueueDescriptor
 
struct  minimum
 
class  MinimumLayer
 This layer represents a minimum operation. More...
 
struct  MinimumQueueDescriptor
 
class  MockTensorHandleFactory
 
class  MultiplicationLayer
 This layer represents a multiplication operation. More...
 
struct  MultiplicationQueueDescriptor
 
class  MultiTypedWorkload
 
class  NeonAbsWorkload
 
class  NeonActivationWorkload
 
class  NeonAdditionWorkload
 
class  NeonArgMinMaxWorkload
 
class  NeonBackend
 
class  NeonBackendModelContext
 The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions. More...
 
class  NeonBaseWorkload
 
class  NeonBatchMatMulWorkload
 
class  NeonBatchNormalizationWorkload
 
class  NeonBatchToSpaceNdWorkload
 
class  NeonCastWorkload
 
class  NeonChannelShuffleWorkload
 
class  NeonComparisonWorkload
 
class  NeonConcatWorkload
 
class  NeonConstantWorkload
 
class  NeonConvertFp16ToFp32Workload
 
class  NeonConvertFp32ToFp16Workload
 
class  NeonConvolution2dWorkload
 
class  NeonConvolution3dWorkload
 
class  NeonDepthToSpaceWorkload
 
class  NeonDepthwiseConvolutionWorkload
 
class  NeonDequantizeWorkload
 
class  NeonDetectionPostProcessWorkload
 
class  NeonDivisionWorkload
 
class  NeonElementwiseBinaryWorkload
 
class  NeonExpWorkload
 
class  NeonFillWorkload
 
class  NeonFloorFloatWorkload
 
class  NeonFullyConnectedWorkload
 
class  NeonFusedWorkload
 
class  NeonGatherNdWorkload
 
class  NeonGatherWorkload
 
class  NeonInstanceNormalizationWorkload
 
class  NeonInterceptorScheduler
 
class  NeonL2NormalizationFloatWorkload
 
class  NeonLayerSupport
 
class  NeonLogicalAndWorkload
 
class  NeonLogicalNotWorkload
 
class  NeonLogicalOrWorkload
 
class  NeonLogSoftmaxWorkload
 
class  NeonLogWorkload
 
class  NeonLstmFloatWorkload
 
class  NeonMaximumWorkload
 
class  NeonMeanWorkload
 
class  NeonMemoryManager
 
class  NeonMinimumWorkload
 
class  NeonMultiplicationWorkload
 
class  NeonNegWorkload
 
class  NeonNormalizationFloatWorkload
 
class  NeonPadWorkload
 
class  NeonPermuteWorkload
 
class  NeonPooling2dWorkload
 
class  NeonPooling3dWorkload
 
class  NeonPreluWorkload
 
class  NeonQLstmWorkload
 
class  NeonQuantizedLstmWorkload
 
class  NeonQuantizeWorkload
 
struct  NeonRankWorkload
 
class  NeonReduceWorkload
 
class  NeonReshapeWorkload
 
class  NeonResizeWorkload
 
class  NeonReverseV2Workload
 
class  NeonRsqrtWorkload
 
class  NeonSinWorkload
 
class  NeonSliceWorkload
 
class  NeonSoftmaxWorkload
 
class  NeonSpaceToBatchNdWorkload
 
class  NeonSpaceToDepthWorkload
 
class  NeonSplitterWorkload
 
class  NeonSqrtWorkload
 
class  NeonStackWorkload
 
class  NeonStridedSliceWorkload
 
class  NeonSubTensorHandle
 
class  NeonSubtractionWorkload
 
class  NeonTensorDecorator
 NeonTensorDecorator wraps an existing Neon tensor allowing us to override the TensorInfo for it. More...
 
class  NeonTensorHandle
 
class  NeonTensorHandleDecorator
 
class  NeonTensorHandleFactory
 
class  NeonTileWorkload
 
class  NeonTimer
 
class  NeonTransposeConvolution2dWorkload
 
class  NeonTransposeWorkload
 
class  NeonUnidirectionalSequenceLstmFloatWorkload
 
class  NeonUnidirectionalSequenceLstmWorkload
 
class  NeonWorkloadFactory
 
class  NetworkImpl
 Private implementation of INetwork. More...
 
class  NodeContent
 
struct  NormalizationDescriptor
 A NormalizationDescriptor for the NormalizationLayer. More...
 
class  NormalizationLayer
 This layer represents a normalization operation. More...
 
struct  NormalizationQueueDescriptor
 
struct  NoThrowStrategy
 
struct  NullDescriptor
 Null Descriptor used as a return value from the IConnectableLayer GetParameters method by layers which do not have a descriptor. More...
 
class  NullPointerException
 
class  NullWorkload
 
class  OpenClTimer
 OpenClTimer instrument that times all OpenCl kernels executed between calls to Start() and Stop(). More...
 
class  Optimization
 
struct  OptimizationResult
 
class  OptimizationViews
 
class  OptimizedNetworkImpl
 
class  OptimizeForConnection
 
class  OptimizeForConnectionImpl
 Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. More...
 
class  OptimizeForExclusiveConnection
 
class  OptimizeForExclusiveConnectionImpl
 Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. More...
 
class  OptimizeForType
 
class  OptimizeForTypeImpl
 Wrapper Optimization base class that calls Wrapped::Run() for every layer of type BaseType. More...
 
class  OptimizeForTypeImpl< Layer, Wrapped >
 Specialization that calls Wrapped::Run() for any layer type. More...
 
class  Optimizer
 
struct  OptimizerOptions
 
class  OptimizerOptionsOpaque
 
struct  OptimizerOptionsOpaqueImpl
 
class  Optional
 
class  OptionalBase
 OptionalBase is the common functionality between reference and non-reference optional types. More...
 
class  OptionalReferenceSwitch
 The default implementation is the non-reference case. More...
 
class  OptionalReferenceSwitch< true, T >
 This is the special case for reference types. More...
 
struct  OriginsDescriptor
 An OriginsDescriptor for the ConcatLayer. More...
 
class  OutputHandler
 
class  OutputLayer
 A layer user-provided data can be bound to (e.g. inputs, outputs). More...
 
class  OutputSlot
 
struct  PadDescriptor
 A PadDescriptor for the PadLayer. More...
 
class  PadLayer
 This layer represents a pad operation. More...
 
struct  PadQueueDescriptor
 
class  ParseException
 
class  PassthroughTensorHandle
 
class  PerAxisIterator
 PerAxisIterator for per-axis quantization. More...
 
class  PermutationVector
 
struct  PermuteDescriptor
 A PermuteDescriptor for the PermuteLayer. More...
 
class  PermuteLayer
 This layer represents a permutation operation. More...
 
struct  PermuteQueueDescriptor
 
class  PolymorphicDowncastException
 
struct  Pooling2dDescriptor
 A Pooling2dDescriptor for the Pooling2dLayer. More...
 
class  Pooling2dLayer
 This layer represents a pooling 2d operation. More...
 
struct  Pooling2dQueueDescriptor
 
struct  Pooling3dDescriptor
 A Pooling3dDescriptor for the Pooling3dLayer. More...
 
class  Pooling3dLayer
 This layer represents a pooling 3d operation. More...
 
struct  Pooling3dQueueDescriptor
 
struct  power
 
struct  PreCompiledDescriptor
 A PreCompiledDescriptor for the PreCompiledLayer. More...
 
class  PreCompiledLayer
 
struct  PreCompiledQueueDescriptor
 
class  PreluLayer
 
struct  PreluQueueDescriptor
 
class  ProfilerImpl
 
class  ProfilerManager
 
class  ProfilingDetails
 ProfilingDetails class records any details associated with the operator and passes on for outputting to the user. More...
 
struct  ProgramBuilder
 
class  QASymm8Decoder
 
class  QASymm8Encoder
 
class  QASymmS8Decoder
 
class  QASymmS8Encoder
 
struct  QLstmBasicParameters
 
struct  QLstmDescriptor
 A QLstmDescriptor for the QLstmLayer. More...
 
class  QLstmLayer
 This layer represents a QLstm operation. More...
 
struct  QLstmOptCifgParameters
 
struct  QLstmOptLayerNormParameters
 
struct  QLstmOptPeepholeParameters
 
struct  QLstmOptProjectionParameters
 
struct  QLstmQueueDescriptor
 
class  QSymm16Decoder
 
class  QSymm16Encoder
 
class  QSymm16PerAxisEncoder
 
class  QSymm8PerAxisDecoder
 
class  QSymm8PerAxisEncoder
 
class  QSymmS8Decoder
 
class  QSymmS8Encoder
 
struct  QuantizationParametersAreEqual
 
struct  QuantizedLstmInputParams
 
struct  QuantizedLstmInputParamsInfo
 
class  QuantizedLstmLayer
 This layer represents a QuantizedLstm operation. More...
 
struct  QuantizedLstmParameters
 
struct  QuantizedLstmQueueDescriptor
 
struct  QuantizedMultiplierSmallerThanOne
 Performs multiplication of an integer with a multiplier which is less than one, using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. More...
 
class  QuantizeLayer
 
struct  QuantizeQueueDescriptor
 
struct  QueueDescriptor
 
struct  QueueDescriptorWithParameters
 
class  RangeTracker
 
class  RankLayer
 
struct  RankQueueDescriptor
 
struct  ReduceDescriptor
 A ReduceDescriptor for the REDUCE operators. More...
 
class  ReduceLayer
 This layer represents a reduction operation. More...
 
struct  ReduceQueueDescriptor
 
class  RefActivationWorkload
 
class  RefArgMinMaxWorkload
 
class  RefBackend
 
class  RefBaseWorkload
 
class  RefBatchMatMulWorkload
 
class  RefBatchNormalizationWorkload
 
class  RefBatchToSpaceNdWorkload
 
class  RefBroadcastToWorkload
 
class  RefCastWorkload
 
class  RefChannelShuffleWorkload
 
class  RefComparisonWorkload
 
class  RefConcatWorkload
 
class  RefConstantWorkload
 
class  RefConvertFp16ToFp32Workload
 
class  RefConvertFp32ToFp16Workload
 
class  RefConvolution2dWorkload
 
class  RefConvolution3dWorkload
 
class  RefDebugWorkload
 
class  RefDepthToSpaceWorkload
 
class  RefDepthwiseConvolution2dWorkload
 
class  RefDequantizeWorkload
 
class  RefDetectionPostProcessWorkload
 
class  RefElementwiseBinaryWorkload
 
class  RefElementwiseUnaryWorkload
 
class  RefElementwiseWorkload
 
class  RefFakeQuantizationFloat32Workload
 
class  RefFillWorkload
 
class  RefFloorWorkload
 
class  RefFullyConnectedWorkload
 
class  RefGatherNdWorkload
 
class  RefGatherWorkload
 
class  RefInstanceNormalizationWorkload
 
class  RefL2NormalizationWorkload
 
class  RefLayerSupport
 
class  RefLogicalBinaryWorkload
 
class  RefLogicalUnaryWorkload
 
class  RefLogSoftmaxWorkload
 
class  RefLstmWorkload
 
class  RefMeanWorkload
 
class  RefMemoryManager
 
class  RefNormalizationWorkload
 
class  RefPadWorkload
 
class  RefPermuteWorkload
 
class  RefPooling2dWorkload
 
class  RefPooling3dWorkload
 
class  RefPreluWorkload
 
class  RefQLstmWorkload
 
class  RefQuantizeWorkload
 
struct  RefRankWorkload
 
class  RefReduceWorkload
 
class  RefReshapeWorkload
 
class  RefResizeWorkload
 
class  RefReverseV2Workload
 
class  RefScatterNdWorkload
 
struct  RefShapeWorkload
 
class  RefSliceWorkload
 
class  RefSoftmaxWorkload
 
class  RefSpaceToBatchNdWorkload
 
class  RefSpaceToDepthWorkload
 
class  RefSplitterWorkload
 
class  RefStackWorkload
 
class  RefStridedSliceWorkload
 
class  RefTensorHandle
 
class  RefTensorHandleDecorator
 
class  RefTensorHandleFactory
 
class  RefTileWorkload
 
class  RefTransposeConvolution2dWorkload
 
class  RefTransposeWorkload
 
class  RefUnidirectionalSequenceLstmWorkload
 
class  RefWorkloadFactory
 
struct  ReshapeDescriptor
 A ReshapeDescriptor for the ReshapeLayer. More...
 
class  ReshapeLayer
 This layer represents a reshape operation. More...
 
struct  ReshapeQueueDescriptor
 
struct  ResizeDescriptor
 A ResizeDescriptor for the ResizeLayer. More...
 
class  ResizeLayer
 This layer represents a resize operation. More...
 
struct  ResizeQueueDescriptor
 
struct  ResolveTypeImpl
 
struct  ResolveTypeImpl< DataType::BFloat16 >
 
struct  ResolveTypeImpl< DataType::Boolean >
 
struct  ResolveTypeImpl< DataType::Float16 >
 
struct  ResolveTypeImpl< DataType::Float32 >
 
struct  ResolveTypeImpl< DataType::QAsymmS8 >
 
struct  ResolveTypeImpl< DataType::QAsymmU8 >
 
struct  ResolveTypeImpl< DataType::QSymmS16 >
 
struct  ResolveTypeImpl< DataType::QSymmS8 >
 
struct  ResolveTypeImpl< DataType::Signed32 >
 
struct  ResolveTypeImpl< DataType::Signed64 >
 
class  ReverseV2Layer
 This layer represents a ReverseV2 operation. More...
 
struct  ReverseV2QueueDescriptor
 
struct  rsqrt
 
class  RsqrtLayer
 
struct  RsqrtQueueDescriptor
 
struct  Rule
 
class  RuntimeException
 
struct  RuntimeImpl
 
class  ScaledInt32Decoder
 
class  ScaledInt32PerAxisDecoder
 
struct  ScatterNdDescriptor
 A ScatterNdDescriptor for the ScatterNdLayer. More...
 
class  ScatterNdLayer
 This layer represents a ScatterNd operator. More...
 
struct  ScatterNdQueueDescriptor
 
class  ScopedProfilingEvent
 
struct  ScopedRecord
 
class  ScopedTensorHandle
 
class  ShapeLayer
 
struct  ShapeQueueDescriptor
 
struct  ShapesAreBroadcastCompatible
 
struct  ShapesAreSameRank
 
struct  ShapesAreSameTotalSize
 
class  SimpleLogger
 
struct  sin
 
class  SingleAxisPriorityList
 SingleAxisPriorityList sorts the MemBlocks according to some priority, then trys to place them into as few bins as possible. More...
 
struct  SliceDescriptor
 A SliceDescriptor for the SliceLayer. More...
 
class  SliceLayer
 
struct  SliceQueueDescriptor
 
struct  SoftmaxDescriptor
 A SoftmaxDescriptor for the SoftmaxLayer. More...
 
class  SoftmaxLayer
 This layer represents a softmax operation. More...
 
struct  SoftmaxQueueDescriptor
 
struct  SpaceToBatchNdDescriptor
 A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer. More...
 
class  SpaceToBatchNdLayer
 This layer represents a SpaceToBatchNd operation. More...
 
struct  SpaceToBatchNdQueueDescriptor
 
struct  SpaceToDepthDescriptor
 A SpaceToDepthDescriptor for the SpaceToDepthLayer. More...
 
class  SpaceToDepthLayer
 This layer represents a SpaceToDepth operation. More...
 
struct  SpaceToDepthQueueDescriptor
 
class  SplitterLayer
 This layer represents a split operation. More...
 
struct  SplitterQueueDescriptor
 
struct  sqrt
 
struct  squaredDifference
 
struct  StackDescriptor
 A StackDescriptor for the StackLayer. More...
 
class  StackLayer
 This layer represents a stack operation. More...
 
struct  StackQueueDescriptor
 
class  StandardOutputSink
 
struct  StandInDescriptor
 A StandInDescriptor for the StandIn layer. More...
 
class  StandInLayer
 This layer represents an unknown operation in the input graph. More...
 
class  StrategyBase
 Strategy base class with empty implementations. More...
 
struct  StrategyFactory
 
class  StrategyValidator
 
struct  StridedSliceDescriptor
 A StridedSliceDescriptor for the StridedSliceLayer. More...
 
class  StridedSliceLayer
 This layer represents a strided slice operation. More...
 
struct  StridedSliceQueueDescriptor
 
struct  StringifyLayerParameters
 StringifyLayerParameters allows serializing layer parameters to string. More...
 
struct  StringifyLayerParameters< ActivationDescriptor >
 
struct  StringifyLayerParameters< BatchMatMulDescriptor >
 
struct  StringifyLayerParameters< BatchNormalizationDescriptor >
 
struct  StringifyLayerParameters< BatchToSpaceNdDescriptor >
 
struct  StringifyLayerParameters< ChannelShuffleDescriptor >
 
struct  StringifyLayerParameters< ComparisonDescriptor >
 
struct  StringifyLayerParameters< Convolution2dDescriptor >
 
struct  StringifyLayerParameters< Convolution3dDescriptor >
 
struct  StringifyLayerParameters< DepthwiseConvolution2dDescriptor >
 
struct  StringifyLayerParameters< DetectionPostProcessDescriptor >
 
struct  StringifyLayerParameters< ElementwiseBinaryDescriptor >
 
struct  StringifyLayerParameters< ElementwiseUnaryDescriptor >
 
struct  StringifyLayerParameters< FakeQuantizationDescriptor >
 
struct  StringifyLayerParameters< FullyConnectedDescriptor >
 
struct  StringifyLayerParameters< FusedDescriptor >
 
struct  StringifyLayerParameters< GatherDescriptor >
 
struct  StringifyLayerParameters< L2NormalizationDescriptor >
 
struct  StringifyLayerParameters< LstmDescriptor >
 
struct  StringifyLayerParameters< MeanDescriptor >
 
struct  StringifyLayerParameters< NormalizationDescriptor >
 
struct  StringifyLayerParameters< OriginsDescriptor >
 
struct  StringifyLayerParameters< PadDescriptor >
 
struct  StringifyLayerParameters< PermuteDescriptor >
 
struct  StringifyLayerParameters< Pooling2dDescriptor >
 
struct  StringifyLayerParameters< Pooling3dDescriptor >
 
struct  StringifyLayerParameters< PreCompiledDescriptor >
 
struct  StringifyLayerParameters< ReduceDescriptor >
 
struct  StringifyLayerParameters< ReshapeDescriptor >
 
struct  StringifyLayerParameters< ResizeDescriptor >
 
struct  StringifyLayerParameters< SoftmaxDescriptor >
 
struct  StringifyLayerParameters< SpaceToBatchNdDescriptor >
 
struct  StringifyLayerParameters< SpaceToDepthDescriptor >
 
struct  StringifyLayerParameters< StackDescriptor >
 
struct  StringifyLayerParameters< StridedSliceDescriptor >
 
struct  StringifyLayerParameters< TileDescriptor >
 
struct  StringifyLayerParameters< TransposeConvolution2dDescriptor >
 
struct  StringifyLayerParameters< TransposeDescriptor >
 
struct  StringifyLayerParameters< ViewsDescriptor >
 
struct  StringMapping
 StringMapping is helper class to be able to use strings as template parameters, so this allows simplifying code which only differs in a string, such as a debug string literal. More...
 
class  SubgraphView
 The SubgraphView class represents a subgraph of a Graph. More...
 
class  SubgraphViewSelector
 Algorithm that splits a Graph into Subgraphs based on a filtering of layers (e.g. More...
 
class  SubtractionLayer
 This layer represents a subtraction operation. More...
 
struct  SubtractionQueueDescriptor
 
class  SwitchLayer
 This layer calculates both true and false outputs for input. More...
 
struct  SwitchQueueDescriptor
 
class  SyncMemGenericWorkload
 
class  Tensor
 A tensor defined by a TensorInfo (shape and data type) and a mutable backing store. More...
 
class  TensorBufferArrayView
 
class  TensorHandle
 
class  TensorHandleFactoryRegistry
 
class  TensorInfo
 
struct  TensorMemory
 
struct  TensorNumDimensionsAreCorrect
 
struct  TensorNumDimensionsAreGreaterOrEqualTo
 
class  TensorShape
 
struct  ThrowingStrategy
 
struct  TileDescriptor
 
class  TileLayer
 
struct  TileQueueDescriptor
 
class  TimeoutException
 
class  TosaRefBackend
 
class  TosaRefBaseWorkload
 
class  TosaRefLayerSupport
 
class  TosaRefMemoryManager
 
class  TosaRefPreCompiledWorkload
 
class  TosaRefTensorHandle
 
class  TosaRefTensorHandleFactory
 
class  TosaRefWorkloadFactory
 
class  TransformIterator
 
struct  TransposeConvolution2dDescriptor
 A TransposeConvolution2dDescriptor for the TransposeConvolution2dLayer. More...
 
class  TransposeConvolution2dLayer
 This layer represents a 2D transpose convolution operation. More...
 
struct  TransposeConvolution2dQueueDescriptor
 
struct  TransposeDescriptor
 A TransposeDescriptor for the TransposeLayer. More...
 
class  TransposeLayer
 This layer represents a transpose operation. More...
 
struct  TransposeQueueDescriptor
 
struct  TypeAnyOf
 
class  TypedIterator
 
class  TypedWorkload
 
struct  TypeIs
 
struct  TypeNotPerAxisQuantized
 
struct  TypesAreEqual
 
class  UnidirectionalSequenceLstmLayer
 This layer represents a LSTM operation. More...
 
struct  UnidirectionalSequenceLstmQueueDescriptor
 
class  UnimplementedException
 
class  UnmapLayer
 This layer represents a memory copy operation. More...
 
struct  UnmapQueueDescriptor
 
class  UnmapWorkload
 
struct  ViewsDescriptor
 A ViewsDescriptor for the SplitterLayer. More...
 
class  WallClockTimer
 
class  WorkloadDataCollector
 
class  WorkloadFactoryBase
 
struct  WorkloadInfo
 Contains information about TensorInfos of a layer. More...
 

Typedefs

using BackendIdVector = std::vector< BackendId >
 
using BackendIdSet = std::unordered_set< BackendId >
 
using NetworkOptions = std::vector< BackendOptions >
 
using ModelOptions = std::vector< BackendOptions >
 
using BackendCapabilities = BackendOptions
 
using IBackendInternalUniquePtr = std::unique_ptr< IBackendInternal >
 
using MemoryOptimizerStrategiesMapRef = std::unordered_map< BackendId, std::shared_ptr< IMemoryOptimizerStrategy > >
 
using DynamicBackendPtr = std::unique_ptr< DynamicBackend >
 
using IBackendContextUniquePtr = std::unique_ptr< IBackendContext >
 
using ILayerSupportSharedPtr = std::shared_ptr< ILayerSupport >
 
using IMemoryManagerUniquePtr = std::unique_ptr< IMemoryManager >
 
template<typename QueueDescriptor >
using FloatWorkload = TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Float32Workload = TypedWorkload< QueueDescriptor, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Uint8Workload = TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 >
 
template<typename QueueDescriptor >
using Int32Workload = TypedWorkload< QueueDescriptor, armnn::DataType::Signed32 >
 
template<typename QueueDescriptor >
using BooleanWorkload = TypedWorkload< QueueDescriptor, armnn::DataType::Boolean >
 
template<typename QueueDescriptor >
using BaseFloat32ComparisonWorkload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean >
 
template<typename QueueDescriptor >
using BaseUint8ComparisonWorkload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Boolean >
 
template<typename QueueDescriptor >
using BFloat16ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::BFloat16, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Float32ToBFloat16Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::BFloat16 >
 
template<typename QueueDescriptor >
using Float16ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Float32ToFloat16Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Float16 >
 
template<typename QueueDescriptor >
using Uint8ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Float32 >
 
using InputQueueDescriptor = MemCopyQueueDescriptor
 
using OutputQueueDescriptor = MemCopyQueueDescriptor
 
using MergerQueueDescriptor = ConcatQueueDescriptor
 
using LogSoftmaxDescriptor = SoftmaxDescriptor
 A LogSoftmaxDescriptor for the LogSoftmaxLayer. More...
 
using DepthToSpaceDescriptor = SpaceToDepthDescriptor
 A DepthToSpaceDescriptor for the DepthToSpaceLayer. More...
 
using UnidirectionalSequenceLstmDescriptor = LstmDescriptor
 
using ConcatDescriptor = OriginsDescriptor
 
using MergerDescriptor = OriginsDescriptor
 MergerDescriptor is deprecated, use ConcatDescriptor instead. More...
 
using SplitterDescriptor = ViewsDescriptor
 
using INetworkPtr = std::unique_ptr< INetwork, void(*)(INetwork *network)>
 
using IOptimizedNetworkPtr = std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)>
 
using CompiledBlobDeleter = std::function< void(const void *)>
 
using CompiledBlobPtr = std::unique_ptr< void, CompiledBlobDeleter >
 
using NetworkId = int
 
using IRuntimePtr = std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)>
 
using IGpuAccTunedParametersPtr = std::shared_ptr< IGpuAccTunedParameters >
 The following API is replaced by the backend options API. More...
 
using MemorySourceFlags = unsigned int
 
using BindingPointInfo = std::pair< armnn::LayerBindingId, armnn::TensorInfo >
 
using InputTensors = std::vector< std::pair< LayerBindingId, class ConstTensor > >
 
using OutputTensors = std::vector< std::pair< LayerBindingId, class Tensor > >
 
using IBackendSharedPtr = std::shared_ptr< IBackend >
 
using IBackendUniquePtr = std::unique_ptr< IBackend, void(*)(IBackend *backend)>
 
using LayerBindingId = int
 Type of identifiers for bindable layers (inputs, outputs). More...
 
using ImportedInputId = unsigned int
 
using ImportedOutputId = unsigned int
 
using DebugCallbackFunction = std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)>
 Define the type of callback for the Debug layer to call. More...
 
using HighResolutionClock = std::chrono::high_resolution_clock::time_point
 Define a timer and associated inference ID for recording execution times. More...
 
using InferenceTimingPair = std::pair< HighResolutionClock, HighResolutionClock >
 
using TensorInfos = std::vector< TensorInfo >
 
using WorkloadQueue = std::vector< std::unique_ptr< IWorkload > >
 
using Coordinates = std::array< unsigned int, MaxNumOfTensorDimensions >
 
using Dimensions = std::array< unsigned int, MaxNumOfTensorDimensions >
 
using LayerPriority = unsigned int
 
using AdditionalInfoObjectPtr = std::shared_ptr< void >
 
using PreCompiledObjectDeleter = std::function< void(const void *)>
 
using PreCompiledObjectPtr = std::unique_ptr< void, PreCompiledObjectDeleter >
 
template<LayerType Type>
using LayerTypeOf = typename LayerTypeOfImpl< Type >::Type
 
using NetworkImplPtr = std::unique_ptr< NetworkImpl, void(*)(NetworkImpl *network)>
 
using BackendsMap = std::map< BackendId, std::unique_ptr< class IBackendInternal > >
 
template<DataType DT>
using ResolveType = typename ResolveTypeImpl< DT >::Type
 
using LoadedNetworks = std::unordered_map< NetworkId, std::unique_ptr< LoadedNetwork > >
 
using IReportStructure = arm::pipe::IReportStructure
 
using IInitialiseProfilingService = arm::pipe::IInitialiseProfilingService
 
using ParameterStringifyFunction = std::function< void(const std::string &name, const std::string &value)>
 
using FactoryId = ITensorHandleFactory::FactoryId
 
using Half = half_float::half
 
using CopyAndImportFactoryPairs = std::map< ITensorHandleFactory::FactoryId, ITensorHandleFactory::FactoryId >
 
using ACLMemManagerOnDemand = std::shared_ptr< arm_compute::MemoryManagerOnDemand >
 
using RefDebugBFloat16Workload = RefDebugWorkload< DataType::BFloat16 >
 
using RefDebugFloat16Workload = RefDebugWorkload< DataType::Float16 >
 
using RefDebugFloat32Workload = RefDebugWorkload< DataType::Float32 >
 
using RefDebugQAsymmU8Workload = RefDebugWorkload< DataType::QAsymmU8 >
 
using RefDebugQAsymmS8Workload = RefDebugWorkload< DataType::QAsymmS8 >
 
using RefDebugQSymmS16Workload = RefDebugWorkload< DataType::QSymmS16 >
 
using RefDebugQSymmS8Workload = RefDebugWorkload< DataType::QSymmS8 >
 
using RefDebugSigned32Workload = RefDebugWorkload< DataType::Signed32 >
 
using RefDebugSigned64Workload = RefDebugWorkload< DataType::Signed64 >
 
using RefDebugBooleanWorkload = RefDebugWorkload< DataType::Boolean >
 
template<typename DataType = float>
using RefAdditionWorkload = RefElementwiseWorkload< std::plus< DataType >, AdditionQueueDescriptor, StringMapping::RefAdditionWorkload_Execute >
 
template<typename DataType = float>
using RefSubtractionWorkload = RefElementwiseWorkload< std::minus< DataType >, SubtractionQueueDescriptor, StringMapping::RefSubtractionWorkload_Execute >
 
template<typename DataType = float>
using RefMultiplicationWorkload = RefElementwiseWorkload< std::multiplies< DataType >, MultiplicationQueueDescriptor, StringMapping::RefMultiplicationWorkload_Execute >
 
template<typename DataType = float>
using RefDivisionWorkload = RefElementwiseWorkload< std::divides< DataType >, DivisionQueueDescriptor, StringMapping::RefDivisionWorkload_Execute >
 
template<typename DataType = float>
using RefMaximumWorkload = RefElementwiseWorkload< armnn::maximum< DataType >, MaximumQueueDescriptor, StringMapping::RefMaximumWorkload_Execute >
 
template<typename DataType = float>
using RefMinimumWorkload = RefElementwiseWorkload< armnn::minimum< DataType >, MinimumQueueDescriptor, StringMapping::RefMinimumWorkload_Execute >
 
using RefPermuteBFloat16Workload = RefPermuteWorkload< DataType::BFloat16 >
 
using RefPermuteFloat16Workload = RefPermuteWorkload< DataType::Float16 >
 
using RefPermuteFloat32Workload = RefPermuteWorkload< DataType::Float32 >
 
using RefPermuteQAsymmS8Workload = RefPermuteWorkload< DataType::QAsymmS8 >
 
using RefPermuteQAsymm8Workload = RefPermuteWorkload< DataType::QAsymmU8 >
 
using RefPermuteQSymm16Workload = RefPermuteWorkload< DataType::QSymmS16 >
 
using RefTransposeBFloat16Workload = RefTransposeWorkload< DataType::BFloat16 >
 
using RefTransposeFloat16Workload = RefTransposeWorkload< DataType::Float16 >
 
using RefTransposeFloat32Workload = RefTransposeWorkload< DataType::Float32 >
 
using RefTransposeQAsymmS8Workload = RefTransposeWorkload< DataType::QAsymmS8 >
 
using RefTransposeQAsymm8Workload = RefTransposeWorkload< DataType::QAsymmU8 >
 
using RefTransposeQSymm16Workload = RefTransposeWorkload< DataType::QSymmS16 >
 

Enumerations

enum  Compute { Undefined = 0, CpuRef = 1, CpuAcc = 2, GpuAcc = 3 }
 The Compute enum is now deprecated and it is now being replaced by BackendId. More...
 
enum  CapabilityClass { PaddingRequired = 1, FallbackImportDisabled = 2, CapabilityClassMax = 254 }
 Capability class to calculate in the GetCapabilities function so that only the capability in the scope can be choose to calculate. More...
 
enum  EdgeStrategy { Undefined, DirectCompatibility, ExportToTarget, CopyToTarget }
 
enum  BoostLogSeverityMapping {
  trace, debug, info, warning,
  error, fatal
}
 
enum  Status { Success = 0, Failure = 1 }
 
enum  DataType {
  Float16 = 0, Float32 = 1, QAsymmU8 = 2, Signed32 = 3,
  Boolean = 4, QSymmS16 = 5, QSymmS8 = 6, QAsymmS8 = 7,
  BFloat16 = 8, Signed64 = 9
}
 
enum  DataLayout { NCHW = 1, NHWC = 2, NDHWC = 3, NCDHW = 4 }
 
enum  ProfilingDetailsMethod { Undefined = 0, DetailsWithEvents = 1, DetailsOnly = 2 }
 Define the behaviour of the internal profiler when outputting network details. More...
 
enum  QosExecPriority { Low = 0, Medium = 1, High = 2 }
 
enum  ActivationFunction {
  Sigmoid = 0, TanH = 1, Linear = 2, ReLu = 3,
  BoundedReLu = 4, SoftReLu = 5, LeakyReLu = 6, Abs = 7,
  Sqrt = 8, Square = 9, Elu = 10, HardSwish = 11,
  Gelu = 12
}
 
enum  ArgMinMaxFunction { Min = 0, Max = 1 }
 
enum  ComparisonOperation {
  Equal = 0, Greater = 1, GreaterOrEqual = 2, Less = 3,
  LessOrEqual = 4, NotEqual = 5
}
 
enum  LogicalBinaryOperation { LogicalAnd = 0, LogicalOr = 1 }
 
enum  UnaryOperation {
  Abs = 0, Exp = 1, Sqrt = 2, Rsqrt = 3,
  Neg = 4, LogicalNot = 5, Log = 6, Sin = 7,
  Ceil = 8
}
 
enum  BinaryOperation {
  Add = 0, Div = 1, Maximum = 2, Minimum = 3,
  Mul = 4, Sub = 5, SqDiff = 6, Power = 7
}
 
enum  PoolingAlgorithm { Max = 0, Average = 1, L2 = 2 }
 
enum  ReduceOperation {
  Sum = 0, Max = 1, Mean = 2, Min = 3,
  Prod = 4
}
 
enum  ResizeMethod { Bilinear = 0, NearestNeighbor = 1 }
 
enum  Dimensionality { NotSpecified = 0, Specified = 1, Scalar = 2 }
 
enum  PaddingMethod { IgnoreValue = 0, Exclude = 1 }
 The padding method modifies the output of pooling layers. More...
 
enum  PaddingMode { Constant = 0, Reflect = 1, Symmetric = 2 }
 The padding mode controls whether the padding should be filled with constant values (Constant), or reflect the input, either including the border values (Symmetric) or not (Reflect). More...
 
enum  NormalizationAlgorithmChannel { Across = 0, Within = 1 }
 
enum  NormalizationAlgorithmMethod { LocalBrightness = 0, LocalContrast = 1 }
 
enum  OutputShapeRounding { Floor = 0, Ceiling = 1 }
 
enum  ShapeInferenceMethod { ValidateOnly = 0, InferAndValidate = 1 }
 The ShapeInferenceMethod modify how the output shapes are treated. More...
 
enum  MemorySource : uint32_t {
  Undefined = 0, Malloc = 1, DmaBuf = 2, DmaBufProtected = 4,
  Gralloc = 8
}
 Define the Memory Source to reduce copies. More...
 
enum  MemBlockStrategyType { SingleAxisPacking = 0, MultiAxisPacking = 1 }
 
enum  FusedKernelType { AddMulAdd = 0 }
 
enum  BackendCapability : uint32_t { NonConstWeights, AsyncExecution }
 BackendCapability class. More...
 
enum  LayerType {
  X, Activation, Addition, ArgMinMax,
  BatchNormalization, BatchToSpaceNd, Comparison, Concat,
  Constant, ConvertFp16ToFp32, ConvertFp32ToFp16, Convolution2d,
  Debug, DepthToSpace, DepthwiseConvolution2d, Dequantize,
  DetectionPostProcess, Division, ElementwiseUnary, FakeQuantization,
  Fill, Floor, FullyConnected, Gather,
  Input, InstanceNormalization, L2Normalization, LogicalBinary,
  LogSoftmax, Lstm, QLstm, Map,
  Maximum, Mean, MemCopy, MemImport,
  Merge, Minimum, Multiplication, Normalization,
  Output, Pad, Permute, Pooling2d,
  PreCompiled, Prelu, Quantize, QuantizedLstm,
  Reshape, Rank, Resize, Reduce,
  Slice, Softmax, SpaceToBatchNd, SpaceToDepth,
  Splitter, Stack, StandIn, StridedSlice,
  Subtraction, Switch, Transpose, TransposeConvolution2d,
  Unmap, Cast, Shape, UnidirectionalSequenceLstm,
  ChannelShuffle, Convolution3d, Pooling3d, GatherNd,
  BatchMatMul, ElementwiseBinary, ReverseV2, Tile,
  Fused, BroadcastTo, ScatterNd, FirstLayer = Activation,
  LastLayer = ScatterNd
}
 When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below. More...
 
enum  ScatterNdFunction {
  Update = 0, Add = 1, Sub = 2, Max = 3,
  Min = 4, Mul = 5
}
 
enum  LogSeverity {
  Trace, Debug, Info, Warning,
  Error, Fatal
}
 
enum  GraphEvent { LayerAdded, LayerErased }
 
enum  JsonObjectType { Measurement, Event, ExecObjectDesc }
 
enum  TuningLevel { None, Rapid, Normal, Exhaustive }
 

Functions

LayerSupportHandle GetILayerSupportByBackendId (const armnn::BackendId &backend)
 Convenience function to retrieve the ILayerSupportHandle for a backend. More...
 
bool HasCapability (const std::string &name, const BackendCapabilities &capabilities)
 Convenience function to check if a capability exists in a BackendCapabilites struct. More...
 
bool HasCapability (const std::string &name, const armnn::BackendId &backend)
 Convenience function to check if a capability exists in a backend. More...
 
bool HasCapability (const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
 Convenience function to check if a given capability matches a capability in a BackendCapabilities struct. More...
 
bool HasCapability (const BackendOptions::BackendOption &backendOption, const armnn::BackendId &backend)
 Convenience function to check if a given capability matches a capability in a backend. More...
 
bool HasMatchingCapability (const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
 Convenience function to check if a given capability matches a capability in a BackendCapabilities struct. More...
 
bool HasMatchingCapability (const BackendOptions::BackendOption &backendOption, const armnn::BackendId &backend)
 Convenience function to check if a given capability matches a capability in a backend. More...
 
Optional< const BackendOptions::BackendOptionGetCapability (const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
 Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted. More...
 
Optional< const BackendOptions::BackendOptionGetCapability (const std::string &backendCapabilityName, const armnn::BackendId &backend)
 Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted. More...
 
unsigned int GetNumberOfCacheFiles (const armnn::BackendId &backend)
 Returns the number of cached files if backend supports caching. More...
 
constexpr char const * GetComputeDeviceAsCString (Compute compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const std::vector< Compute > &compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const std::set< Compute > &compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const Compute &compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const BackendId &id)
 
template<template< typename... > class TContainer, typename... TContainerTemplateArgs>
std::ostream & operator<< (std::ostream &os, const TContainer< BackendId, TContainerTemplateArgs... > &ids)
 
template<typename F >
void ParseOptions (const std::vector< BackendOptions > &options, BackendId backend, F f)
 
bool ParseBooleanBackendOption (const armnn::BackendOptions::Var &value, bool defaultValue)
 
std::string ParseStringBackendOption (const armnn::BackendOptions::Var &value, std::string defaultValue)
 
int ParseIntBackendOption (const armnn::BackendOptions::Var &value, int defaultValue)
 
BackendRegistryBackendRegistryInstance ()
 
std::ostream & operator<< (std::ostream &os, const BackendVersion &backendVersion)
 
TensorShape GetUnpaddedTensorStrides (const TensorInfo &tensorInfo)
 
DataType GetBiasDataType (DataType inputDataType)
 
template<typename TensorShapeIt >
OriginsDescriptor CreateDescriptorForConcatenation (TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
 Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing concatenation of a number of input tensors. More...
 
template<typename ExceptionType >
void ConditionalThrow (bool condition, const std::string &message)
 
template<typename ExceptionType >
void ConditionalThrow (bool condition)
 
template<typename ExceptionType , typename ComparedType >
void ConditionalThrowIfNotEqual (const std::string &message, const ComparedType &leftHandSide, const ComparedType &rightHandSide)
 ComparedType must support: operator==(const ComparedType&) operator<<(ostream&, const ComparedType&) More...
 
IOptimizedNetworkPtr Optimize (const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptionsOpaque &options=OptimizerOptionsOpaque(), Optional< std::vector< std::string > & > messages=EmptyOptional())
 Create an optimized version of the network. More...
 
IOptimizedNetworkPtr Optimize (const Graph &inGraph, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptionsOpaque &options, Optional< std::vector< std::string > & > messages=EmptyOptional())
 Create an optimized version of the network. More...
 
IOptimizedNetworkPtr Optimize (const Graph &inGraph, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options, Optional< std::vector< std::string > & > messages=EmptyOptional())
 Accept legacy OptimizerOptions. More...
 
IOptimizedNetworkPtr Optimize (const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options, Optional< std::vector< std::string > & > messages=EmptyOptional())
 Accept legacy OptimizerOptions. More...
 
std::string LevelToString (LogSeverity level)
 
LogSeverity StringToLogLevel (std::string level)
 
void SetLogFilter (LogSeverity level)
 
void SetAllLoggingSinks (bool standardOut, bool debugOut, bool coloured)
 
constexpr LogSeverity ConvertLogSeverity (BoostLogSeverityMapping severity)
 
template<typename Arg , typename std::enable_if< IsMemorySource< Arg >::value >::type * = nullptr>
MemorySourceFlags Combine (Arg sourceA, Arg sourceB)
 
template<typename Arg , typename ... Args, typename std::enable_if< IsMemorySource< Arg >::value >::type * = nullptr>
MemorySourceFlags Combine (Arg source, Args... rest)
 
bool CheckFlag (MemorySourceFlags flags, MemorySource source)
 
template<typename T , class... Args>
Optional< T > MakeOptional (Args &&... args)
 Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> object. More...
 
const char * GetLayerTypeAsCString (LayerType type)
 
constexpr char const * GetStatusAsCString (Status status)
 
constexpr char const * GetActivationFunctionAsCString (ActivationFunction activation)
 
constexpr char const * GetArgMinMaxFunctionAsCString (ArgMinMaxFunction function)
 
constexpr char const * GetComparisonOperationAsCString (ComparisonOperation operation)
 
constexpr char const * GetBinaryOperationAsCString (BinaryOperation operation)
 
constexpr char const * GetUnaryOperationAsCString (UnaryOperation operation)
 
constexpr char const * GetLogicalBinaryOperationAsCString (LogicalBinaryOperation operation)
 
constexpr char const * GetFusedTypeAsCString (FusedKernelType type)
 
constexpr char const * GetPoolingAlgorithmAsCString (PoolingAlgorithm pooling)
 
constexpr char const * GetOutputShapeRoundingAsCString (OutputShapeRounding rounding)
 
constexpr char const * GetPaddingMethodAsCString (PaddingMethod method)
 
constexpr char const * GetPaddingModeAsCString (PaddingMode mode)
 
constexpr char const * GetReduceOperationAsCString (ReduceOperation reduce_operation)
 
constexpr unsigned int GetDataTypeSize (DataType dataType)
 
template<unsigned N>
constexpr bool StrEqual (const char *strA, const char(&strB)[N])
 
constexpr armnn::Compute ParseComputeDevice (const char *str)
 Deprecated function that will be removed together with the Compute enum. More...
 
constexpr const char * GetDataTypeName (DataType dataType)
 
constexpr const char * GetDataLayoutName (DataLayout dataLayout)
 
constexpr const char * GetNormalizationAlgorithmChannelAsCString (NormalizationAlgorithmChannel channel)
 
constexpr const char * GetNormalizationAlgorithmMethodAsCString (NormalizationAlgorithmMethod method)
 
constexpr const char * GetResizeMethodAsCString (ResizeMethod method)
 
constexpr const char * GetMemBlockStrategyTypeName (MemBlockStrategyType memBlockStrategyType)
 
template<typename T >
constexpr bool IsQuantizedType ()
 
constexpr bool IsQuantized8BitType (DataType dataType)
 
constexpr bool IsQuantizedType (DataType dataType)
 
std::ostream & operator<< (std::ostream &os, Status stat)
 
std::ostream & operator<< (std::ostream &os, const armnn::TensorShape &shape)
 
template<typename QuantizedType >
QuantizedType Quantize (float value, float scale, int32_t offset)
 Quantize a floating point data type into an 8-bit data type. More...
 
template<typename QuantizedType >
float Dequantize (QuantizedType value, float scale, int32_t offset)
 Dequantize an 8-bit data type into a floating point data type. More...
 
void VerifyTensorInfoDataType (const armnn::TensorInfo &info, armnn::DataType dataType)
 
template<typename ... Ts>
void IgnoreUnused (Ts &&...)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Source >::value &&std::is_integral< Source >::value &&std::is_signed< Dest >::value &&std::is_integral< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Source >::value &&std::is_floating_point< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Source >::value &&std::is_signed< Dest >::value &&std::is_integral< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Source >::value &&std::is_integral< Source >::value &&std::is_floating_point< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Dest >::value &&std::is_integral< Dest >::value &&std::is_unsigned< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Dest >::value &&std::is_unsigned< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Dest >::value &&std::is_signed< Source >::value &&std::is_integral< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Dest >::value &&std::is_floating_point< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename DestType , typename SourceType >
DestType PolymorphicDowncast (SourceType *value)
 Polymorphic downcast for build in pointers only. More...
 
template<typename DestType , typename SourceType >
auto PolymorphicPointerDowncast (const SourceType &value)
 Polymorphic downcast for shared pointers and build in pointers. More...
 
std::chrono::high_resolution_clock::time_point GetTimeNow ()
 
std::chrono::duration< double, std::milli > GetTimeDuration (std::chrono::high_resolution_clock::time_point start_time)
 
template<typename Function , typename Iterator >
constexpr TransformIterator< Function, Iterator > MakeTransformIterator (Iterator i, Function f)
 
void ConfigureLogging (bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
 Configures the logging behaviour of the ARMNN library. More...
 
bool NeonDetected ()
 
const std::string GetVersion ()
 
float roundf (float value)
 
void swap (OriginsDescriptor &first, OriginsDescriptor &second)
 
void swap (ViewsDescriptor &first, ViewsDescriptor &second)
 
uint32_t GetNumInputs (bool biasEnabled)
 
void AssertNumberOfInputSlots (Layer &layer)
 
template<typename T >
constexpr LayerType LayerEnumOf (const T *=nullptr)
 
template<>
constexpr LayerType LayerEnumOf (const ActivationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const AdditionLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ArgMinMaxLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const BatchMatMulLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const BatchNormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const BatchToSpaceNdLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const BroadcastToLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const CastLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ChannelShuffleLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ComparisonLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConcatLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConstantLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConvertFp16ToFp32Layer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConvertFp32ToFp16Layer *)
 
template<>
constexpr LayerType LayerEnumOf (const Convolution2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const Convolution3dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DebugLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DepthToSpaceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DepthwiseConvolution2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DequantizeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DetectionPostProcessLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DivisionLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ElementwiseBinaryLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ElementwiseUnaryLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FakeQuantizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FillLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FloorLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FullyConnectedLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FusedLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const GatherLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const GatherNdLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const InputLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const InstanceNormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const L2NormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const LogicalBinaryLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const LogSoftmaxLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const LstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MapLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MaximumLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MeanLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MemCopyLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MemImportLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MergeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MinimumLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MultiplicationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const NormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const OutputLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PadLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PermuteLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const Pooling2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const Pooling3dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PreCompiledLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PreluLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const QuantizeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const QLstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const QuantizedLstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const RankLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ReduceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ReshapeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ResizeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ReverseV2Layer *)
 
template<>
constexpr LayerType LayerEnumOf (const ScatterNdLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ShapeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SliceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SoftmaxLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SpaceToBatchNdLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SpaceToDepthLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SplitterLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const StackLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const StandInLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const StridedSliceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SubtractionLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SwitchLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const TileLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const TransposeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const TransposeConvolution2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const UnidirectionalSequenceLstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const UnmapLayer *)
 
template<typename T , typename V >
void SetValueChecked (Optional< T & > optionalRef, V &&val)
 
template<typename Float16Func , typename Float32Func , typename Uint8Func , typename Int32Func , typename BooleanFunc , typename ... Params>
bool IsSupportedForDataTypeGeneric (Optional< std::string & > reasonIfUnsupported, DataType dataType, Float16Func float16FuncPtr, Float32Func float32FuncPtr, Uint8Func uint8FuncPtr, Int32Func int32FuncPtr, BooleanFunc booleanFuncPtr, Params &&... params)
 
template<typename ... Params>
bool TrueFunc (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFunc (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncF16 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncF32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncU8 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncI32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseInputFuncF32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseInputFuncF16 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseOutputFuncF32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseOutputFuncF16 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
void ValidateSourcesMatchOptimizedNetwork (std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
 This function performs a sanity check to ensure that the combination of input and output memory source matches the values for importEnabled and exportEnabled that were specified during optimization. More...
 
void CopyToOutputTensor (const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
 
const armnn::ConstTensor GetInputTensor (const LayerBindingId layerId, const InputTensors &inputTensors)
 
const armnn::Tensor GetOutputTensor (const LayerBindingId layerId, const OutputTensors &outputTensors)
 
template<LogSeverity Level>
void SetLoggingSinks (bool standardOut, bool debugOut, bool coloured)
 
void ReportError (const std::string &errorMessage, Optional< std::vector< std::string > & > errorMessages)
 
void ReportWarning (const std::string &warningMessage, Optional< std::vector< std::string > & > warningMessages)
 
OptimizationResult ReturnWithError (OptimizationResult res, const Layer *layer, const BackendSettings &backendSettings, Optional< std::vector< std::string > & > errMessages)
 
bool CheckScaleSetOnQuantizedType (Layer *layer, Optional< std::vector< std::string > & > errMessages)
 
OptimizationResult AttemptBackendAssignment (BackendSettings &backendSettings, Graph &graph, Layer *layer, BackendId backend, DataType dataTypeIn, DataType dataTypeOut, const std::vector< BackendId > &availablePreferredBackends, std::string &reasonIfUnsupported, Optional< std::vector< std::string > & > errMessages)
 
std::vector< DataTypeGetLayerInOutDatatype (const Layer *layer)
 
bool CheckFp16Support (BackendsMap &backends, const std::vector< BackendId > &availablePreferredBackends)
 
void AssignBackendsIConnectable (OptimizedNetworkImpl *optNetObjPtr, IConnectableLayer *it, Optional< std::vector< std::string > & > errMessages, OptimizationResult &result, BackendSettings &backendSettings, std::vector< BackendId > &availablePreferredBackends)
 
OptimizationResult AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, Graph::Iterator &firstLayer, Graph::Iterator &lastLayer, Optional< std::vector< std::string > & > errMessages)
 
OptimizationResult AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView::IConnectableLayerIterator &firstLayer, SubgraphView::IConnectableLayerIterator &lastLayer, Optional< std::vector< std::string > & > errMessages)
 
OptimizationResult AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView &subgraph, Optional< std::vector< std::string > & > errMessages)
 
BackendsMap CreateSupportedBackends (TensorHandleFactoryRegistry &handleFactoryRegistry, BackendSettings &backendSettings)
 
OptimizationResult ApplyBackendOptimizations (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, BackendsMap &backends, const ModelOptions &modelOptions, Optional< std::vector< std::string > & > errMessages)
 
bool RequiresCopy (ITensorHandleFactory::FactoryId src, ITensorHandleFactory::FactoryId dst, TensorHandleFactoryRegistry &registry)
 
ITensorHandleFactory::FactoryId CalculateSlotOptionForInput (BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry, bool importEnabled)
 
ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput (BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry)
 
ITensorHandleFactory::FactoryId CalculateSlotOption (BackendsMap &backends, OutputSlot &outputSlot, TensorHandleFactoryRegistry &registry, bool exportEnabled)
 
EdgeStrategy CalculateEdgeStrategy (BackendsMap &backends, ITensorHandleFactory::FactoryId srcFactoryId, const Layer &layer, const Layer &connectedLayer, TensorHandleFactoryRegistry &registry, bool importEnabled)
 
OptimizationResult SelectTensorHandleStrategy (Graph &optGraph, BackendsMap &backends, TensorHandleFactoryRegistry &registry, bool importEnabled, bool exportEnabled, Optional< std::vector< std::string > & > errMessages)
 
std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore (Graph &graph, Layer &layer, bool expectCorrectInputType)
 
std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter (Graph &graph, Layer &layer)
 
std::vector< DebugLayer * > InsertDebugLayerAfter (Graph &graph, Layer &layer, bool toFile)
 
bool RevertConstantWeightsToFP32 (Layer *layer)
 
template<typename T >
void Append (Optimizer::Optimizations &optimizations, T &&optimization)
 
template<typename Front , typename... Others>
void Append (Optimizer::Optimizations &optimizations, Front &&front, Others &&... others)
 
template<typename... Args>
Optimizer::Optimizations MakeOptimizations (Args &&... args)
 
Measurement FindMeasurement (const std::string &name, const Event *event)
 
std::vector< MeasurementFindKernelMeasurements (const Event *event)
 
const EventGetEventPtr (const Event *ptr)
 
const EventGetEventPtr (const std::unique_ptr< Event > &ptr)
 
int CalcLevel (const Event *eventPtr)
 
void ConfigureDetailsObject (JsonChildObject &detailsObject, std::string layerDetailsStr)
 
void ExtractJsonObjects (unsigned int inferenceIndex, const Event *parentEvent, JsonChildObject &parentObject, std::map< const Event *, std::vector< const Event * >> descendantsMap)
 
template<typename DescriptorType >
void ProfilingUpdateDescriptions (const std::string &name, const DescriptorType &desc, const WorkloadInfo &infos, const arm::pipe::ProfilingGuid guid)
 
template<typename Delegate >
void ForEachLayerInput (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)
 
template<typename Delegate >
void ForEachLayerOutput (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)
 
void AssignSplitId (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo)
 
bool IsReadyForSplitAssignment (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo)
 
bool IsLayerSupported (const armnn::Layer *layer)
 
bool IsLayerSupported (const armnn::Layer &layer)
 
bool IsLayerOptimizable (const armnn::Layer *layer)
 
bool IsLayerOptimizable (const armnn::Layer &layer)
 
constexpr const char * MockTensorHandleFactoryId ()
 
GraphGetGraphForTesting (IOptimizedNetwork *optNet)
 
ModelOptionsGetModelOptionsForTesting (IOptimizedNetwork *optNet)
 
arm::pipe::IProfilingService & GetProfilingService (armnn::RuntimeImpl *runtime)
 
std::ostream & operator<< (std::ostream &os, const BFloat16 &b)
 
template<typename LayerType >
LayerTypeFuseLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
 
template<typename LayerType >
LayerTypeFuseAdditionLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseSubtractionLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseDivisionLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseMultiplicationLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseElementwiseBinaryLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, BinaryOperation operation, std::string name)
 
template<typename LayerType >
LayerTypeFuseBatchNormalizationLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseConvolution2dLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseDepthwiseConvolution2dLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseFullyConnectedLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
std::vector< IConnectableLayer * > ChainReduceLayers (OptimizationViews &optimizationViews, LayerType *baseLayer, ReduceDescriptor &desc)
 
template<typename LayerType >
void ReplaceLayers (OptimizationViews &optimizationViews, LayerType *baseLayer, std::vector< IConnectableLayer * > &layers)
 
template<typename LayerType >
void ReplaceMultipleLayers (OptimizationViews &optimizationViews, std::vector< IConnectableLayer * > &originalLayers, LayerType *baseLayer, const std::vector< SlotList > inputLayersSlotLists, const std::vector< SlotList > outputLayersSlotLists)
 
TuningLevel ParseTuningLevel (const BackendOptions::Var &value, TuningLevel defaultValue)
 
void ConfigureTuner (arm_compute::CLTuner &tuner, TuningLevel level)
 
arm_compute::NormalizationLayerInfo CreateAclNormalizationLayerInfoForL2Normalization (const armnn::TensorInfo &tensorInfo, armnn::DataLayout dataLayout)
 
arm_compute::ActivationLayerInfo::ActivationFunction ConvertActivationFunctionToAclActivationFunction (ActivationFunction armnnFunction)
 
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo (const ActivationDescriptor &actDesc)
 
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo (const ActivationDescriptor *activationDescPtr)
 
arm_compute::ActivationLayerInfo ConvertAdditionalInfoToAclActivationLayerInfo (const QueueDescriptor &queueDescriptor)
 
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo (uint32_t activationFunction)
 
arm_compute::ComparisonOperation ConvertComparisonOperationToAcl (const ComparisonDescriptor &descriptor)
 
arm_compute::PoolingType ConvertPoolingAlgorithmToAclPoolingType (PoolingAlgorithm poolingAlgorithm)
 
arm_compute::DimensionRoundingType ConvertOutputShapeRoundingToAclDimensionRoundingType (OutputShapeRounding rounding)
 
arm_compute::NormType ConvertNormalizationAlgorithmChannelToAclNormType (NormalizationAlgorithmChannel channelType)
 
arm_compute::FullyConnectedLayerInfo ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo (const FullyConnectedDescriptor &fullyConnectedDesc, const ActivationDescriptor *activationDesc)
 
arm_compute::FullyConnectedLayerInfo ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo (const FullyConnectedDescriptor &fullyConnectedDesc, arm_compute::ActivationLayerInfo activationLayerInfo)
 
arm_compute::InterpolationPolicy ConvertResizeMethodToAclInterpolationPolicy (ResizeMethod resizeMethod)
 
template<typename T >
ComputeSoftmaxAclAxis (const SoftmaxDescriptor &softmaxDesc, const armnn::TensorInfo &tensor)
 
int ComputeAclAxis (const int &armnnAxis, const armnn::TensorInfo &tensor)
 Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank) More...
 
arm_compute::Conv3dInfo ComputeConv3DInfo (const armnn::Convolution3dDescriptor descriptor, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor. More...
 
arm_compute::Conv3dInfo ComputeConv3DInfo (const armnn::Convolution3dQueueDescriptor queueDescriptor, bool isFastMathEnabled)
 
arm_compute::PaddingMode ConvertPaddingModeToAcl (const PaddingMode &paddingMode)
 
arm_compute::ReductionOperation ConvertReductionOperationToAcl (const ReduceDescriptor &descriptor)
 
const TensorInfo ComputeReductionTensorShape (const armnn::TensorInfo &input, const std::vector< uint32_t > &vAxis, const bool keepDims)
 Function to compute the output tensor shape based on the axes and if keepDims is set. More...
 
armnn::Optional< armnn::DataTypeGetBiasTypeFromWeightsType (armnn::Optional< armnn::DataType > weightsType)
 
template<typename F >
bool CheckSupportRule (F rule, Optional< std::string & > reasonIfUnsupported, const char *reason)
 
template<typename T >
bool AllTypesAreEqualImpl (T)
 
template<typename T , typename... Rest>
bool AllTypesAreEqualImpl (T t1, T t2, Rest... rest)
 
std::unique_ptr< IMemoryOptimizerStrategyGetMemoryOptimizerStrategy (const std::string &strategyName)
 
const std::vector< std::string > GetMemoryOptimizerStrategyNames ()
 
bool IsNCHW (armnn::Layer &layer)
 
void ReportUntouchedLayers (OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
 
template<typename LayerType >
LayerTypeFoldPadLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, PadLayer *padLayer)
 
bool ConnectedToLayerWithNCHW (Layer *baseLayer)
 Checks if the Layer is connected to any Layer that has an NCHW layout. More...
 
bool ConnectedToLayerType (Layer *baseLayer, LayerType layerType, unsigned int dimSize=0)
 Checks the Layer's Connections to see if it's connected to a Layer with the provided layerType. More...
 
void RemoveReshapeLayer (ReshapeLayer *baseLayer, std::map< LayerGuid, Layer * > &untouched, OptimizationViews &optimizationViews)
 
template<typename LayerType >
LayerTypeFoldPadIntoAveragePool2d (OptimizationViews &optimizationViews, Pooling2dLayer *baseLayer, Pooling2dDescriptor &poolDescriptor, PadLayer *padLayer)
 
bool IsSequenceLayerType (Layer &layer, LayerType type)
 
bool IsSequenceLayerType (Layer &layer, BinaryOperation type)
 
template<typename TYPE >
bool IsLayerSequence (Layer &currentLayer, TYPE first, TYPE second, TYPE third, Layer *layerList[4], bool handleValidActivates, const std::vector< ActivationFunction > &validActivates)
 
armnn::ConstTensor PermuteTensor (const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
 
void ReshapeWeightsForAcl (TensorInfo &weightInfo, DataLayout dataLayout)
 
template<typename DataType >
ConstTensor ReorderWeightChannelsForAcl (const ConstTensor &weightHandle, DataLayout dataLayout, void *permuteBuffer)
 
TensorInfo ConvertWeightTensorInfoFromArmnnToAcl (const TensorInfo &weightInfo, DataLayout dataLayout)
 
std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl (const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout dataLayout, void *permuteBuffer)
 Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library. More...
 
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl (const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
 Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library Returns a tuple of converted weights tensor info and depth multiplier. More...
 
std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW (const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout &dataLayout, void *permuteBuffer)
 Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]. More...
 
armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl (const ConstTensorHandle *weightTensor, DataLayout dataLayout, void *permuteBuffer)
 
int32_t ConvertMaskToACLFormat (int32_t mask, int32_t numDim)
 
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices (TensorInfo inputInfo0, TensorInfo inputInfo1)
 Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1) More...
 
armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions (unsigned int rank)
 Generates a permutation vector of size rank that permutes the 2 most right dimensions. More...
 
std::set< unsigned int > ComputeSplitAxis (const armnn::SplitterDescriptor &desc, const TensorShape &input)
 Calculates the axis values for split operation. More...
 
template<typename CopyFunc >
void CopyTensorContentsGeneric (const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
 
template<typename SrcTensorHandleType , typename DstTensorHandleType , typename DescriptorType >
void GatherTensorHandlePairs (const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType * >> &tensorHandlePairs)
 
constexpr const char * ClBackendId ()
 
flatbuffers::Offset< ClContext > CreateClContext (flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>> programs=0)
 
flatbuffers::Offset< ClContext > CreateClContextDirect (flatbuffers::FlatBufferBuilder &_fbb, const std::vector< flatbuffers::Offset< armnn::Program >> *programs=nullptr)
 
flatbuffers::Offset< Program > CreateProgram (flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::String > name=0, flatbuffers::Offset< flatbuffers::Vector< uint8_t >> binary=0)
 
flatbuffers::Offset< Program > CreateProgramDirect (flatbuffers::FlatBufferBuilder &_fbb, const char *name=nullptr, const std::vector< uint8_t > *binary=nullptr)
 
const armnn::ClContext * GetClContext (const void *buf)
 
const armnn::ClContext * GetSizePrefixedClContext (const void *buf)
 
const char * ClContextIdentifier ()
 
bool ClContextBufferHasIdentifier (const void *buf)
 
bool VerifyClContextBuffer (flatbuffers::Verifier &verifier)
 
bool VerifySizePrefixedClContextBuffer (flatbuffers::Verifier &verifier)
 
const char * ClContextExtension ()
 
void FinishClContextBuffer (flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset< armnn::ClContext > root)
 
void FinishSizePrefixedClContextBuffer (flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset< armnn::ClContext > root)
 
constexpr const char * ClImportTensorHandleFactoryId ()
 
constexpr const char * ClTensorHandleFactoryId ()
 
arm_compute::Status ClAbsWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClActivationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor)
 
arm_compute::Status ClAdditionValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClArgMinMaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ArgMinMaxDescriptor &descriptor)
 
arm_compute::Status ClBatchMatMulValidate (const TensorInfo &inputInfoX, const TensorInfo &inputInfoY, const TensorInfo &outputInfo, const BatchMatMulDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClBatchNormalizationValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClBatchToSpaceNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor)
 
arm_compute::Status ClCastValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClChannelShuffleValidate (const TensorInfo &input, const TensorInfo &output, const ChannelShuffleDescriptor &descriptor)
 
arm_compute::Status ClComparisonWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ComparisonDescriptor &descriptor)
 
arm_compute::Status ClConcatWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const OriginsDescriptor &descriptor)
 
arm_compute::Status ClConstantWorkloadValidate (const TensorInfo &output)
 
arm_compute::Status ClConvertFp16ToFp32WorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClConvertFp32ToFp16WorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClConvolution3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution3dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClDepthToSpaceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthToSpaceDescriptor &descriptor)
 
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClDequantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClDivisionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClElementwiseBinaryValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ElementwiseBinaryDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClExpWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClFloorWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClFullyConnectedWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClGatherNdWorkloadValidate (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo)
 
arm_compute::Status ClGatherWorkloadValidate (const TensorInfo &input, const TensorInfo &indices, const TensorInfo &output, const GatherDescriptor &descriptor)
 
arm_compute::Status ClInstanceNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const InstanceNormalizationDescriptor &descriptor)
 
arm_compute::Status ClL2NormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor)
 
arm_compute::Status ClLogicalAndWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClLogicalNotWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClLogicalOrWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClLogSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const LogSoftmaxDescriptor &descriptor)
 
arm_compute::Status ClLogWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status ClMaximumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClMeanValidate (const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor)
 
arm_compute::Status ClMinimumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClMultiplicationWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClNegWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor)
 
arm_compute::Status ClPadValidate (const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor)
 
arm_compute::Status ClPermuteWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor)
 
arm_compute::Status ClPooling2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor)
 
arm_compute::Status ClPooling3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling3dDescriptor &descriptor)
 
arm_compute::Status ClPreluWorkloadValidate (const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output)
 
arm_compute::Status ClQLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const TensorInfo &output, const QLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status ClQuantizedLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &previousCellStateIn, const TensorInfo &previousOutputIn, const TensorInfo &cellStateOut, const TensorInfo &output, const QuantizedLstmInputParamsInfo &paramsInfo)
 
arm_compute::Status ClQuantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClReduceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
 
arm_compute::Status ClReshapeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClResizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor)
 
arm_compute::Status ClReverseV2WorkloadValidate (const TensorInfo &input, const TensorInfo &axis, const TensorInfo &output)
 
arm_compute::Status ClRsqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClScatterNdWorkloadValidate (const TensorInfo &inputInfo, const TensorInfo &indicesInfo, const TensorInfo &updatesInfo, const TensorInfo &outputInfo, const ScatterNdDescriptor &descriptor)
 
arm_compute::Status ClSinWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SliceDescriptor &descriptor)
 
arm_compute::Status ClSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
 
arm_compute::Status ClSpaceToBatchNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor)
 
arm_compute::Status ClSpaceToDepthWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor)
 
arm_compute::Status ClSplitterWorkloadValidate (const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, unsigned int splitAxis)
 
arm_compute::Status ClSqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClStackWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const StackDescriptor &descriptor)
 
arm_compute::Status ClStridedSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor)
 
arm_compute::Status ClSubtractionValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClTileWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TileDescriptor &descriptor)
 
arm_compute::Status ClTransposeConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
arm_compute::Status ClTransposeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeDescriptor &descriptor)
 
arm_compute::Status ClUnidirectionalSequenceLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
std::string GetConvolutionMethodString (arm_compute::ConvolutionMethod &convolutionMethod)
 
template<typename T >
void CopyArmComputeClTensorData (arm_compute::CLTensor &dstTensor, const T *srcData)
 
auto SetClStridedSliceData (const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
 
auto SetClSliceData (const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)
 
void InitializeArmComputeClTensorData (arm_compute::CLTensor &clTensor, const ConstTensorHandle *handle)
 
RuntimeException WrapClError (const cl::Error &clError, const CheckLocation &location)
 
void RunClFunction (arm_compute::IFunction &function, const CheckLocation &location)
 
template<typename DataType , typename PayloadType >
DataTypeGetOutputTensorData (unsigned int idx, const PayloadType &data)
 
template<typename T >
void DeleteAsType (const void *const blob)
 
SubgraphView::InputSlots CreateInputsFrom (Layer *layer)
 
SubgraphView::OutputSlots CreateOutputsFrom (Layer *layer)
 
SubgraphView::SubgraphViewPtr CreateSubgraphViewFrom (SubgraphView::InputSlots &&inputs, SubgraphView::OutputSlots &&outputs, SubgraphView::Layers &&layers)
 
constexpr const char * GpuFsaBackendId ()
 
template<typename ... Args>
bool IsGpuFsaBackendSupported (Optional< std::string & > reasonIfUnsupported, Args... args)
 
constexpr const char * GpuFsaTensorHandleFactoryId ()
 
template<DataType ArmnnType>
bool IsDataType (const WorkloadInfo &info)
 
arm_compute::Status GpuFsaActivationValidate (const TensorInfo &input, const ActivationDescriptor &descriptor)
 
void GpuFsaActivationCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ActivationDescriptor &descriptor)
 
arm_compute::Status GpuFsaBatchMatMulValidate (const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)
 
void GpuFsaBatchMatMulCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)
 
arm_compute::Status GpuFsaCastValidate (const TensorInfo &input, const TensorInfo &output)
 
void GpuFsaCastCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status GpuFsaConvolution2dValidate (const TensorInfo &input, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
void GpuFsaConvolution2dCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
arm_compute::Status GpuFsaDepthwiseConvolution2dValidate (const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
void GpuFsaDepthwiseConvolution2dCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
arm_compute::Status GpuFsaElementwiseBinaryValidate (const TensorInfo &input0, const TensorInfo &input1, const ElementwiseBinaryDescriptor &descriptor)
 
void GpuFsaElementwiseBinaryCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const ElementwiseBinaryDescriptor &descriptor)
 
arm_compute::Status GpuFsaPooling2dValidate (const TensorInfo &input, const Pooling2dDescriptor &descriptor)
 
void GpuFsaPooling2dCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Pooling2dDescriptor &descriptor)
 
arm_compute::Status GpuFsaReshapeValidate (const TensorInfo &input, const ReshapeDescriptor &descriptor)
 
void GpuFsaReshapeCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ReshapeDescriptor &descriptor)
 
arm_compute::Status GpuFsaResizeValidate (const TensorInfo &input, const ResizeDescriptor &descriptor)
 
void GpuFsaResizeCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ResizeDescriptor &descriptor)
 
arm_compute::Status GpuFsaSoftmaxValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
 
void GpuFsaSoftmaxCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
 
arm_compute::Status GpuFsaConstantWorkloadValidate (const TensorInfo &output)
 
bool GpuFsaPreCompiledWorkloadValidate (std::string *reasonIfUnsupported)
 
constexpr const char * NeonBackendId ()
 
bool CollapseLeadingUnitDimensions (const TensorInfo &in, TensorInfo &out)
 
template<typename SlotListType >
void BuildAddMulAddSlotLists (bool handleReLu, bool multipleOutputs, std::vector< SlotListType > &inputLayersSlotLists, std::vector< SlotListType > &outputLayersSlotLists)
 
void GetFusedName (Layer *layerList[4], std::string &fusedName)
 
template<typename Type >
bool BuildAddMulAddTensorInfoLists (Type *layerList[4], unsigned int &numInputs, unsigned int &numOutputs, std::vector< TensorInfo > &inputInfos, std::vector< TensorInfo > &outputInfos, const ActivationDescriptor *&activationDescriptor, bool &fuseReLu)
 
bool IsLayerTypeSupported (const LayerType &type, const std::vector< TensorInfo > &infos, const BaseDescriptor &descriptor, const Optional< LstmInputParamsInfo > &lstmParamsInfo, const Optional< QuantizedLstmInputParamsInfo > &quantizedLstmParamsInfo, Optional< std::string & > reasonIfUnsupported, const NeonLayerSupport &support)
 
constexpr const char * NeonTensorHandleFactoryId ()
 
arm_compute::Status NeonAbsWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonActivationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor)
 
arm_compute::Status NeonAdditionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonArgMinMaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ArgMinMaxDescriptor &descriptor)
 
arm_compute::Status NeonBatchMatMulValidate (const TensorInfo &inputInfoX, const TensorInfo &inputInfoY, const TensorInfo &outputInfo, const BatchMatMulDescriptor &descriptor, const bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonBatchNormalizationValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonBatchToSpaceNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor)
 
arm_compute::Status NeonCastValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonChannelShuffleValidate (const TensorInfo &input, const TensorInfo &output, const ChannelShuffleDescriptor &descriptor)
 
arm_compute::Status NeonComparisonWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ComparisonDescriptor &descriptor)
 
arm_compute::Status NeonConcatWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const OriginsDescriptor &descriptor)
 
arm_compute::Status NeonConstantWorkloadValidate (const TensorInfo &output)
 
arm_compute::Status NeonConvertFp16ToFp32WorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonConvertFp32ToFp16WorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonConvolution3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution3dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonDepthToSpaceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthToSpaceDescriptor &descriptor)
 
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonDequantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::DetectionPostProcessLayerInfo MakeInfo (const DetectionPostProcessDescriptor &descriptor)
 
arm_compute::Status NeonDetectionPostProcessValidate (const TensorInfo &boxEncodings, const TensorInfo &scores, const TensorInfo &anchors, const TensorInfo &detectionBoxes, const TensorInfo &detectionClasses, const TensorInfo &detectionScores, const TensorInfo &numDetections, const DetectionPostProcessDescriptor &descriptor)
 
arm_compute::Status NeonDivisionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonElementwiseBinaryWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ElementwiseBinaryDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonExpWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonFullyConnectedWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonFusedWorkloadValidate (const std::vector< std::reference_wrapper< TensorInfo >> &inputInfos, const std::vector< std::reference_wrapper< TensorInfo >> &outputInfos, const FusedDescriptor &fusedDescriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonGatherNdWorkloadValidate (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo)
 
arm_compute::Status NeonGatherWorkloadValidate (const TensorInfo &input, const TensorInfo &indices, const TensorInfo &output, const GatherDescriptor &descriptor)
 
arm_compute::Status NeonInstanceNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const InstanceNormalizationDescriptor &descriptor)
 
arm_compute::Status NeonL2NormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor)
 
arm_compute::Status NeonLogicalAndWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status NeonLogicalNotWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonLogicalOrWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status NeonLogSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const LogSoftmaxDescriptor &descriptor)
 
arm_compute::Status NeonLogWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonMaximumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status NeonMeanWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor)
 
arm_compute::Status NeonMinimumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 Validate function for validating the inputs and output. More...
 
arm_compute::Status NeonMultiplicationWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonNegWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor)
 
arm_compute::Status NeonPadWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor)
 
arm_compute::Status NeonPermuteWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor)
 
arm_compute::Status NeonPooling2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor)
 
arm_compute::Status NeonPooling3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling3dDescriptor &descriptor)
 
arm_compute::Status NeonPreluWorkloadValidate (const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output)
 
arm_compute::Status NeonQLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const TensorInfo &output, const QLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonQuantizedLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const QuantizedLstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonQuantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonReduceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
 
arm_compute::Status NeonReshapeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonResizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor)
 
arm_compute::Status NeonReverseV2WorkloadValidate (const TensorInfo &input, const TensorInfo &axis, const TensorInfo &output)
 
arm_compute::Status NeonRsqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonSinWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SliceDescriptor &descriptor)
 
arm_compute::Status NeonSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
 
arm_compute::Status NeonSpaceToBatchNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor)
 
arm_compute::Status NeonSpaceToDepthWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor)
 
arm_compute::Status NeonSplitterWorkloadValidate (const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, unsigned int splitAxis)
 
arm_compute::Status NeonSqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonStackWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const StackDescriptor &descriptor)
 
arm_compute::Status NeonStridedSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor)
 
arm_compute::Status NeonSubtractionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonTileWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TileDescriptor &descriptor)
 
arm_compute::Status NeonTransposeConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
arm_compute::Status NeonTransposeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeDescriptor &descriptor)
 
arm_compute::Status NeonUnidirectionalSequenceLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonUnidirectionalSequenceLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
template<typename T >
void CopyArmComputeTensorData (arm_compute::Tensor &dstTensor, const T *srcData)
 
void InitializeArmComputeTensorData (arm_compute::Tensor &tensor, TensorInfo tensorInfo, const ITensorHandle *handle)
 
void InitializeArmComputeTensorData (arm_compute::Tensor &tensor, const ConstTensorHandle *handle)
 
auto SetNeonStridedSliceData (const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
 
auto SetNeonSliceData (const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)
 
constexpr const char * RefBackendId ()
 
constexpr const char * RefTensorHandleFactoryId ()
 
bool IsSigned64 (const WorkloadInfo &info)
 
bool IsSigned32 (const WorkloadInfo &info)
 
bool IsBFloat16 (const WorkloadInfo &info)
 
bool IsFloat16 (const WorkloadInfo &info)
 
bool IsQSymmS16 (const WorkloadInfo &info)
 
bool IsQSymmS8 (const WorkloadInfo &info)
 
bool IsQAsymmS8 (const WorkloadInfo &info)
 
bool IsQAsymmU8 (const WorkloadInfo &info)
 
bool IsBoolean (const WorkloadInfo &info)
 
template<typename QueueDescriptorType >
constexpr bool IsOperationQueueDescriptor (const QueueDescriptorType &)
 
template<>
constexpr bool IsOperationQueueDescriptor (const MemCopyQueueDescriptor &)
 
template<>
constexpr bool IsOperationQueueDescriptor (const ConstantQueueDescriptor &)
 
template<>
constexpr bool IsOperationQueueDescriptor (const PermuteQueueDescriptor &)
 
float Activation (float in, ActivationFunction function, float a, float b)
 
void Activation (Decoder< float > &in, Encoder< float > &out, const TensorInfo &tensorInfo, ActivationFunction function, float a, float b)
 
template<typename OUT >
void ArgMinMax (Decoder< float > &in, OUT *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
 
template void ArgMinMax (Decoder< float > &in, int32_t *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
 
template void ArgMinMax (Decoder< float > &in, int64_t *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
 
void BatchNormImpl (const BatchNormalizationQueueDescriptor &data, Decoder< float > &meanDecoder, Decoder< float > &varianceDecoder, Decoder< float > &betaDecoder, Decoder< float > &gammaDecoder, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
 
unsigned int Offset (const TensorShape &shape, unsigned int batch, unsigned int height, unsigned int width, unsigned int channels, const DataLayoutIndexed &dataLayout)
 
void BatchToSpaceNd (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const BatchToSpaceNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
 
void Concatenate (const ConcatQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)
 
void Convolve3d (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int paddingFront, unsigned int xStride, unsigned int yStride, unsigned int zStride, unsigned int xDilation, unsigned int yDilation, unsigned int zDilation)
 
void Convolve (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)
 
template<typename T >
void PrintOutput (const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, std::ostream &os)
 
template<typename T >
void Debug (const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< BFloat16 > (const TensorInfo &inputInfo, const BFloat16 *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< Half > (const TensorInfo &inputInfo, const Half *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< float > (const TensorInfo &inputInfo, const float *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< uint8_t > (const TensorInfo &inputInfo, const uint8_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< int8_t > (const TensorInfo &inputInfo, const int8_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< int16_t > (const TensorInfo &inputInfo, const int16_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< int32_t > (const TensorInfo &inputInfo, const int32_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< int64_t > (const TensorInfo &inputInfo, const int64_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template<typename T >
std::unique_ptr< Decoder< T > > MakeDecoder (const TensorInfo &info, const void *data=nullptr)
 
template<>
std::unique_ptr< Decoder< float > > MakeDecoder (const TensorInfo &info, const void *data)
 
void DepthToSpace (const TensorInfo &inputInfo, const DepthToSpaceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
 
void Dequantize (Decoder< float > &inputDecoder, Encoder< float > &outputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo)
 
std::vector< unsigned int > GenerateRangeK (unsigned int k)
 
void TopKSort (unsigned int k, unsigned int *indices, const float *values, unsigned int numElement)
 
float IntersectionOverUnion (const float *boxI, const float *boxJ)
 
std::vector< unsigned int > NonMaxSuppression (unsigned int numBoxes, const std::vector< float > &boxCorners, const std::vector< float > &scores, float nmsScoreThreshold, unsigned int maxDetection, float nmsIouThreshold)
 
void AllocateOutputData (unsigned int numOutput, unsigned int numSelected, const std::vector< float > &boxCorners, const std::vector< unsigned int > &outputIndices, const std::vector< unsigned int > &selectedBoxes, const std::vector< unsigned int > &selectedClasses, const std::vector< float > &selectedScores, float *detectionBoxes, float *detectionScores, float *detectionClasses, float *numDetections)
 
void DetectionPostProcess (const TensorInfo &boxEncodingsInfo, const TensorInfo &scoresInfo, const TensorInfo &, const TensorInfo &detectionBoxesInfo, const TensorInfo &, const TensorInfo &, const TensorInfo &, const DetectionPostProcessDescriptor &desc, Decoder< float > &boxEncodings, Decoder< float > &scores, Decoder< float > &anchors, float *detectionBoxes, float *detectionClasses, float *detectionScores, float *numDetections)
 
template<typename T >
std::unique_ptr< Encoder< T > > MakeEncoder (const TensorInfo &info, void *data=nullptr)
 
template<>
std::unique_ptr< Encoder< float > > MakeEncoder (const TensorInfo &info, void *data)
 
void Fill (Encoder< float > &output, const TensorShape &desiredOutputShape, const float value)
 Creates a tensor and fills it with a scalar value. More...
 
void FullyConnected (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *rBiasDecoder, bool biasEnabled, unsigned int K, bool transposeWeights)
 Performs a matrix multiplication and optionally adds a bias. More...
 
void Gather (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo, Decoder< float > &params, const int32_t *indices, Encoder< float > &output, const int32_t axis_int)
 
void InstanceNorm (const InstanceNormalizationQueueDescriptor &data, const TensorInfo &inputInfo, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
 
void LogSoftmax (Decoder< float > &input, Encoder< float > &output, const TensorInfo &inputInfo, const LogSoftmaxDescriptor &descriptor)
 
void LstmImpl (const LstmDescriptor &descriptor, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const TensorShape &inputToOutputWeightsShape, const TensorShape &recurrentToOutputWeightsShape, std::unique_ptr< Decoder< float >> &inputData, std::unique_ptr< Decoder< float >> &outputStateIn, std::unique_ptr< Decoder< float >> &cellStateIn, std::unique_ptr< Encoder< float >> &outputStateOut, std::unique_ptr< Encoder< float >> &cellStateOut, std::unique_ptr< Encoder< float >> &output, std::unique_ptr< Decoder< float >> &cellStateOutDecoder, std::unique_ptr< Decoder< float >> &outputDecoder, std::unique_ptr< Decoder< float >> &inputToInputWeightsTensor, std::unique_ptr< Decoder< float >> &inputToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &inputToCellWeightsTensor, std::unique_ptr< Decoder< float >> &inputToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToInputWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToCellWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &cellToInputWeightsTensor, std::unique_ptr< Decoder< float >> &cellToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &cellToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &inputGateBiasTensor, std::unique_ptr< Decoder< float >> &forgetGateBiasTensor, std::unique_ptr< Decoder< float >> &cellBiasTensor, std::unique_ptr< Decoder< float >> &outputGateBiasTensor, std::unique_ptr< Decoder< float >> &projectionWeightsTensor, std::unique_ptr< Decoder< float >> &projectionBiasTensor, std::unique_ptr< Decoder< float >> &inputLayerNormWeights, std::unique_ptr< Decoder< float >> &forgetLayerNormWeights, std::unique_ptr< Decoder< float >> &cellLayerNormWeights, std::unique_ptr< Decoder< float >> &outputLayerNormWeights, std::unique_ptr< Encoder< float >> &inputGateScratch, std::unique_ptr< Encoder< float >> &cellScratch, std::unique_ptr< Encoder< float >> &forgetGateScratch, std::unique_ptr< Encoder< float >> &outputGateScratch, std::unique_ptr< Decoder< float >> &inputGateScratchDecoder, std::unique_ptr< Decoder< float >> &cellScratchDecoder, std::unique_ptr< Decoder< float >> &forgetGateScratchDecoder, std::unique_ptr< Decoder< float >> &outputGateScratchDecoder, float layerNormEpsilon)
 
void MirrorPad (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle *inputHandle, ITensorHandle *outputHandle, const PadQueueDescriptor &data)
 
void Pad (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle *inputHandle, ITensorHandle *outputHandle, const PadQueueDescriptor &data)
 
void Pooling2d (Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling2dDescriptor &params)
 Computes the Pooling2d operation. More...
 
void Pooling3d (Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling3dDescriptor &params)
 Computes the Pooling3d operation. More...
 
void PreluImpl (const TensorInfo &inputInfo, const TensorInfo &alphaInfo, const TensorInfo &outputInfo, Decoder< float > &inputData, Decoder< float > &alphaData, Encoder< float > &outputData)
 
bool NextIndex (const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &current)
 
unsigned int ReducedOutputOffset (const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &index, const unsigned int numAxis, const std::vector< unsigned int > &axis)
 
void Reduce (const TensorInfo &inputInfo, const TensorInfo &outputInfo, Decoder< float > &input, Encoder< float > &output, const std::vector< uint32_t > axis, const ReduceOperation reduceOperation)
 
template<typename DataType >
void ExecuteFunction (std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs, BinaryOperation operation, const std::string &layerName="")
 
void FakeQuantization (const float *inputData, float *outputData, uint32_t numElements, float min, float max)
 
unsigned int GetNumActivations (const TensorInfo &inputInfo)
 
template<typename TensorHandleType = RefTensorHandle>
const TensorInfoGetTensorInfo (const ITensorHandle *tensorHandle)
 float32 helpers More...
 
template<typename DataType , typename PayloadType >
const DataTypeGetInputTensorData (unsigned int idx, const PayloadType &data)
 
template<typename DataType >
DataTypeGetOutputTensorData (ITensorHandle *tensorHandle)
 
template<typename PayloadType >
const float * GetInputTensorDataFloat (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
float * GetOutputTensorDataFloat (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
const HalfGetInputTensorDataHalf (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
HalfGetOutputTensorDataHalf (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
const BFloat16GetInputTensorDataBFloat16 (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
BFloat16GetOutputTensorDataBFloat16 (unsigned int idx, const PayloadType &data)
 
template<typename T >
std::vector< float > Dequantize (const T *quant, const TensorInfo &info)
 u8 helpers More...
 
template<typename T >
void Dequantize (const T *inputData, float *outputData, const TensorInfo &info)
 
void Quantize (uint8_t *quant, const float *dequant, const TensorInfo &info)
 
void Resize (Decoder< float > &in, const TensorInfo &inputInfo, Encoder< float > &out, const TensorInfo &outputInfo, DataLayoutIndexed dataLayout, ResizeMethod resizeMethod, bool alignCorners, bool halfPixelCenters)
 
std::vector< unsigned int > ReverseGetMultIdx (const unsigned int idx, unsigned int inputRank, std::vector< unsigned int > &elementNumInner)
 
unsigned int ReverseGetFlatIdx (const std::vector< unsigned int > &idxList, unsigned int inputRank, std::vector< unsigned int > &elementNumInner)
 
unsigned int ReverseRelocateIdx (unsigned int idx, unsigned int inputRank, std::vector< bool > &axisFlag, std::vector< unsigned int > &dimSize, std::vector< unsigned int > &elementNumInner)
 
void ReverseV2 (const TensorInfo &inputInfo, const TensorInfo &axisInfo, Decoder< float > &inputDecoder, Decoder< int > &axisDecoder, Encoder< float > &outputEncoder)
 
float ScatterOperation (ScatterNdFunction operation, float input, float update)
 
void ScatterNd (const TensorInfo &inputInfo, const TensorInfo &indicesInfo, const TensorInfo &updatesInfo, Decoder< float > &input, Decoder< int > &indices, Decoder< float > &updates, Encoder< float > &output, ScatterNdDescriptor descriptor)
 
void ScatterNd (const TensorInfo &indicesInfo, const TensorInfo &updatesInfo, const TensorInfo &shapeInfo, Decoder< int > &indices, Decoder< float > &updates, Decoder< int > &shape, Encoder< float > &output, ScatterNdDescriptor descriptor)
 
void Slice (const TensorInfo &inputInfo, const SliceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
 
void Softmax (Decoder< float > &in, Encoder< float > &out, const TensorInfo &inputTensorInfo, float beta, int axis)
 Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. More...
 
unsigned int GetOffset (const TensorShape &shape, unsigned int b, unsigned int h, unsigned int w, unsigned int c, const DataLayoutIndexed &dataLayout)
 
void SpaceToBatchNd (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToBatchNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
 
void SpaceToDepth (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToDepthDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
 
void Split (const SplitterQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)
 
template<typename DataType >
void Splitter (const SplitterQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)
 
void Stack (const StackQueueDescriptor &data, std::vector< std::unique_ptr< Decoder< float >>> &inputs, Encoder< float > &output, const TensorInfo &inputInfo, const TensorInfo &outputInfo)
 
void StridedSlice (const TensorInfo &inputInfo, const StridedSliceDescriptor &params, const void *inputData, void *outputData, unsigned int dataTypeSize)
 
std::vector< uint32_t > IndexToCoordinates (std::vector< uint32_t > &shape, uint32_t index)
 
uint32_t CoordinatesToIndex (TensorShape &shape, std::vector< uint32_t > &coordinates)
 
void Tile (const TileDescriptor &params, const TensorInfo &inputInfo, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
 
void TransposeConvolution2dImpl (const TransposeConvolution2dDescriptor &descriptor, const TensorShape &inputShape, Decoder< float > &inputDecoder, const TensorShape &outputShape, Encoder< float > &outputEncoder, const TensorShape &weightsShape, Decoder< float > &weightsDecoder, Decoder< float > *biasesDecoder)
 
constexpr const char * TosaRefBackendId ()
 
constexpr const char * TosaRefTensorHandleFactoryId ()
 
bool TosaRefPreCompiledWorkloadValidate (std::string *)
 

Variables

constexpr unsigned int MaxNumOfTensorDimensions = 5U
 
constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u
 The lowest performance data capture interval we support is 10 miliseconds. More...
 
constexpr unsigned int EXPIRE_RATE = 3U
 Variable to control expire rate of priority queue. More...
 
constexpr std::size_t g_ProfilingEventCountHint = 1024
 
constexpr bool g_WriteProfilingEventSequence = true
 
constexpr bool g_AggregateProfilingEventsByInference = true
 
constexpr bool g_WriteReportToStdOutOnProfilerDestruction = false
 
thread_local IProfilertl_Profiler = nullptr
 
constexpr size_t wordSize = sizeof(size_t) * 8
 
const BackendCapabilities gpuFsaCapabilities ("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", false} })
 
const BackendCapabilities cpuAccCapabilities ("CpuAcc", { {"NonConstWeights", true}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", arm_compute::CPUInfo::get().has_fp16()} })
 
const std::set< armnn::LayerTypepaddingRequiredLayers
 
const BackendCapabilities cpuRefCapabilities ("CpuRef", { {"NonConstWeights", true}, {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", true} })
 
const std::set< armnn::BackendCapabilityoldCpuRefCapabilities
 

Detailed Description

Copyright (c) 2021 ARM Limited and Contributors.

Optional is a drop in replacement for std::optional until we migrate to c++-17.

Copyright (c) 2022-2023 ARM Limited and Contributors.

Copyright (c) 2021-2023 ARM Limited and Contributors.

Copyright (c) 2022-2024 Arm Ltd and Contributors.

Copyright (c) 2022 ARM Limited and Contributors.

Create pages for each tool so they appear nicely in the doxygen tree-view.

Copyright (c) 2021-2024 ARM Limited and Contributors.

All rights reserved.

SPDX-License-Identifier: MIT

Subpages are not listed there. Also we can overwrite the page name this way.

Subpages are not listed there.

Note: The parser, serializer and deserializer pages are created in 01_parsers.dox or 02_deserializer_serializer.dox

Only a subset of the optional features are implemented that we intend to use in ArmNN. There are two distinct implementations here:

1, for normal constructable/destructable types and reference types 2, for reference types The std::optional features we support are:

  • has_value() and operator bool() to tell if the optional has a value
  • value() returns a reference to the held object

Typedef Documentation

◆ ACLMemManagerOnDemand

using ACLMemManagerOnDemand = std::shared_ptr<arm_compute::MemoryManagerOnDemand>

Definition at line 22 of file NeonFullyConnectedWorkload.cpp.

◆ AdditionalInfoObjectPtr

using AdditionalInfoObjectPtr = std::shared_ptr<void>

Definition at line 228 of file Layer.hpp.

◆ BackendCapabilities

Definition at line 19 of file BackendOptions.hpp.

◆ BackendIdSet

using BackendIdSet = std::unordered_set<BackendId>

Definition at line 193 of file BackendId.hpp.

◆ BackendIdVector

using BackendIdVector = std::vector<BackendId>

Definition at line 192 of file BackendId.hpp.

◆ BackendsMap

using BackendsMap = std::map<BackendId, std::unique_ptr<class IBackendInternal> >

Definition at line 285 of file Network.hpp.

◆ BaseFloat32ComparisonWorkload

◆ BaseUint8ComparisonWorkload

◆ BFloat16ToFloat32Workload

◆ BindingPointInfo

Definition at line 276 of file Tensor.hpp.

◆ BooleanWorkload

◆ CompiledBlobDeleter

typedef std::function< void(const void *)> CompiledBlobDeleter

Definition at line 342 of file INetwork.hpp.

◆ CompiledBlobPtr

typedef std::unique_ptr< void, CompiledBlobDeleter > CompiledBlobPtr

Definition at line 343 of file INetwork.hpp.

◆ ConcatDescriptor

Definition at line 60 of file DescriptorsFwd.hpp.

◆ Coordinates

using Coordinates = std::array<unsigned int, MaxNumOfTensorDimensions>

Definition at line 15 of file InternalTypes.hpp.

◆ CopyAndImportFactoryPairs

◆ DebugCallbackFunction

using DebugCallbackFunction = std::function<void(LayerGuid guid, unsigned int slotIndex, ITensorHandle* tensorHandle)>

Define the type of callback for the Debug layer to call.

Parameters
guid- guid of layer connected to the input of the Debug layer
slotIndex- index of the output slot connected to the input of the Debug layer
tensorHandle- TensorHandle for the input tensor to the Debug layer

Definition at line 398 of file Types.hpp.

◆ DepthToSpaceDescriptor

A DepthToSpaceDescriptor for the DepthToSpaceLayer.

Definition at line 1099 of file Descriptors.hpp.

◆ Dimensions

using Dimensions = std::array<unsigned int, MaxNumOfTensorDimensions>

Definition at line 16 of file InternalTypes.hpp.

◆ DynamicBackendPtr

using DynamicBackendPtr = std::unique_ptr<DynamicBackend>

Definition at line 54 of file DynamicBackend.hpp.

◆ FactoryId

◆ Float16ToFloat32Workload

◆ Float32ToBFloat16Workload

◆ Float32ToFloat16Workload

◆ Float32Workload

◆ FloatWorkload

◆ Half

typedef half_float::half Half

Definition at line 22 of file Half.hpp.

◆ HighResolutionClock

using HighResolutionClock = std::chrono::high_resolution_clock::time_point

Define a timer and associated inference ID for recording execution times.

Definition at line 401 of file Types.hpp.

◆ IBackendContextUniquePtr

using IBackendContextUniquePtr = std::unique_ptr<IBackendContext>

Definition at line 34 of file IBackendContext.hpp.

◆ IBackendInternalUniquePtr

typedef std::unique_ptr< IBackendInternal > IBackendInternalUniquePtr

Definition at line 32 of file BackendRegistry.hpp.

◆ IBackendSharedPtr

using IBackendSharedPtr = std::shared_ptr<IBackend>

Definition at line 282 of file Types.hpp.

◆ IBackendUniquePtr

using IBackendUniquePtr = std::unique_ptr<IBackend, void(*)(IBackend* backend)>

Definition at line 283 of file Types.hpp.

◆ IGpuAccTunedParametersPtr

The following API is replaced by the backend options API.

Definition at line 301 of file IRuntime.hpp.

◆ IInitialiseProfilingService

using IInitialiseProfilingService = arm::pipe::IInitialiseProfilingService

Definition at line 28 of file Runtime.hpp.

◆ ILayerSupportSharedPtr

using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>

Definition at line 40 of file ILayerSupport.hpp.

◆ IMemoryManagerUniquePtr

using IMemoryManagerUniquePtr = std::unique_ptr<IMemoryManager>

Definition at line 24 of file IMemoryManager.hpp.

◆ ImportedInputId

using ImportedInputId = unsigned int

Definition at line 310 of file Types.hpp.

◆ ImportedOutputId

using ImportedOutputId = unsigned int

Definition at line 311 of file Types.hpp.

◆ INetworkPtr

using INetworkPtr = std::unique_ptr<INetwork, void(*)(INetwork* network)>

Definition at line 339 of file INetwork.hpp.

◆ InferenceTimingPair

Definition at line 402 of file Types.hpp.

◆ InputQueueDescriptor

Definition at line 91 of file WorkloadData.hpp.

◆ InputTensors

using InputTensors = std::vector<std::pair<LayerBindingId, class ConstTensor> >

Definition at line 394 of file Tensor.hpp.

◆ Int32Workload

◆ IOptimizedNetworkPtr

using IOptimizedNetworkPtr = std::unique_ptr<IOptimizedNetwork, void(*)(IOptimizedNetwork* network)>

Definition at line 340 of file INetwork.hpp.

◆ IReportStructure

using IReportStructure = arm::pipe::IReportStructure

Definition at line 27 of file Runtime.hpp.

◆ IRuntimePtr

using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>

Definition at line 41 of file IRuntime.hpp.

◆ LayerBindingId

using LayerBindingId = int

Type of identifiers for bindable layers (inputs, outputs).

Definition at line 309 of file Types.hpp.

◆ LayerPriority

using LayerPriority = unsigned int

Definition at line 227 of file Layer.hpp.

◆ LayerTypeOf

using LayerTypeOf = typename LayerTypeOfImpl<Type>::Type

Definition at line 95 of file LayersFwd.hpp.

◆ LoadedNetworks

using LoadedNetworks = std::unordered_map<NetworkId, std::unique_ptr<LoadedNetwork> >

Definition at line 26 of file Runtime.hpp.

◆ LogSoftmaxDescriptor

A LogSoftmaxDescriptor for the LogSoftmaxLayer.

Definition at line 196 of file Descriptors.hpp.

◆ MemoryOptimizerStrategiesMapRef

using MemoryOptimizerStrategiesMapRef = std::unordered_map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy> >

Definition at line 33 of file BackendRegistry.hpp.

◆ MemorySourceFlags

using MemorySourceFlags = unsigned int

Definition at line 15 of file MemorySources.hpp.

◆ MergerDescriptor

MergerDescriptor is deprecated, use ConcatDescriptor instead.

Definition at line 64 of file DescriptorsFwd.hpp.

◆ MergerQueueDescriptor

Definition at line 149 of file WorkloadData.hpp.

◆ ModelOptions

using ModelOptions = std::vector<BackendOptions>

Definition at line 18 of file BackendOptions.hpp.

◆ NetworkId

typedef int NetworkId

Definition at line 35 of file IRuntime.hpp.

◆ NetworkImplPtr

using NetworkImplPtr = std::unique_ptr<NetworkImpl, void (*)(NetworkImpl* network)>

Definition at line 29 of file Network.hpp.

◆ NetworkOptions

using NetworkOptions = std::vector<BackendOptions>

Definition at line 16 of file BackendOptions.hpp.

◆ OutputQueueDescriptor

Definition at line 92 of file WorkloadData.hpp.

◆ OutputTensors

using OutputTensors = std::vector<std::pair<LayerBindingId, class Tensor> >

Definition at line 395 of file Tensor.hpp.

◆ ParameterStringifyFunction

using ParameterStringifyFunction = std::function<void(const std::string& name, const std::string& value)>

Definition at line 14 of file SerializeLayerParameters.hpp.

◆ PreCompiledObjectDeleter

using PreCompiledObjectDeleter = std::function<void(const void*)>

Definition at line 19 of file PreCompiledLayer.hpp.

◆ PreCompiledObjectPtr

using PreCompiledObjectPtr = std::unique_ptr<void, PreCompiledObjectDeleter>

Definition at line 20 of file PreCompiledLayer.hpp.

◆ RefAdditionWorkload

◆ RefDebugBFloat16Workload

◆ RefDebugBooleanWorkload

◆ RefDebugFloat16Workload

◆ RefDebugFloat32Workload

◆ RefDebugQAsymmS8Workload

◆ RefDebugQAsymmU8Workload

◆ RefDebugQSymmS16Workload

◆ RefDebugQSymmS8Workload

◆ RefDebugSigned32Workload

◆ RefDebugSigned64Workload

◆ RefDivisionWorkload

◆ RefMaximumWorkload

◆ RefMinimumWorkload

◆ RefMultiplicationWorkload

◆ RefPermuteBFloat16Workload

◆ RefPermuteFloat16Workload

◆ RefPermuteFloat32Workload

◆ RefPermuteQAsymm8Workload

◆ RefPermuteQAsymmS8Workload

◆ RefPermuteQSymm16Workload

◆ RefSubtractionWorkload

◆ RefTransposeBFloat16Workload

◆ RefTransposeFloat16Workload

◆ RefTransposeFloat32Workload

◆ RefTransposeQAsymm8Workload

◆ RefTransposeQAsymmS8Workload

◆ RefTransposeQSymm16Workload

◆ ResolveType

using ResolveType = typename ResolveTypeImpl<DT>::Type

Definition at line 79 of file ResolveType.hpp.

◆ SplitterDescriptor

Definition at line 65 of file DescriptorsFwd.hpp.

◆ TensorInfos

using TensorInfos = std::vector<TensorInfo>

Definition at line 152 of file BackendHelper.cpp.

◆ Uint8ToFloat32Workload

◆ Uint8Workload

◆ UnidirectionalSequenceLstmDescriptor

◆ WorkloadQueue

using WorkloadQueue = std::vector< std::unique_ptr<IWorkload> >

Definition at line 13 of file ExecutionFrame.hpp.

Enumeration Type Documentation

◆ ActivationFunction

enum ActivationFunction
strong
Enumerator
Sigmoid 
TanH 
Linear 
ReLu 
BoundedReLu 

min(a, max(b, input)) ReLu1 & ReLu6.

SoftReLu 
LeakyReLu 
Abs 
Sqrt 
Square 
Elu 
HardSwish 
Gelu 

Definition at line 86 of file Types.hpp.

87 {
88  Sigmoid = 0,
89  TanH = 1,
90  Linear = 2,
91  ReLu = 3,
92  BoundedReLu = 4, ///< min(a, max(b, input)) ReLu1 & ReLu6.
93  SoftReLu = 5,
94  LeakyReLu = 6,
95  Abs = 7,
96  Sqrt = 8,
97  Square = 9,
98  Elu = 10,
99  HardSwish = 11,
100  Gelu = 12
101 };

◆ ArgMinMaxFunction

enum ArgMinMaxFunction
strong
Enumerator
Min 
Max 

Definition at line 103 of file Types.hpp.

104 {
105  Min = 0,
106  Max = 1
107 };

◆ BackendCapability

enum BackendCapability : uint32_t
strong

BackendCapability class.

Enumerator
NonConstWeights 

Constant weights can be accessed through the descriptors, On the other hand, non-const weights can be accessed through inputs.

AsyncExecution 

Asynchronous Execution.

Definition at line 286 of file Types.hpp.

286  : uint32_t
287 {
288  /// Constant weights can be accessed through the descriptors,
289  /// On the other hand, non-const weights can be accessed through inputs.
291 
292  /// Asynchronous Execution.
294 
295  // add new enum values here
296 };

◆ BinaryOperation

enum BinaryOperation
strong
Enumerator
Add 
Div 
Maximum 
Minimum 
Mul 
Sub 
SqDiff 
Power 

Definition at line 138 of file Types.hpp.

139 {
140  Add = 0,
141  Div = 1,
142  Maximum = 2,
143  Minimum = 3,
144  Mul = 4,
145  Sub = 5,
146  SqDiff = 6,
147  Power = 7
148 };

◆ BoostLogSeverityMapping

Enumerator
trace 
debug 
info 
warning 
error 
fatal 

Definition at line 196 of file Logging.hpp.

197 {
198  trace,
199  debug,
200  info,
201  warning,
202  error,
203  fatal
204 };

◆ CapabilityClass

enum CapabilityClass
strong

Capability class to calculate in the GetCapabilities function so that only the capability in the scope can be choose to calculate.

Enumerator
PaddingRequired 
FallbackImportDisabled 
CapabilityClassMax 

Definition at line 24 of file ITensorHandleFactory.hpp.

25 {
26  PaddingRequired = 1,
28 
29  // add new enum values here
30 
31  CapabilityClassMax = 254
32 };

◆ ComparisonOperation

enum ComparisonOperation
strong
Enumerator
Equal 
Greater 
GreaterOrEqual 
Less 
LessOrEqual 
NotEqual 

Definition at line 109 of file Types.hpp.

110 {
111  Equal = 0,
112  Greater = 1,
113  GreaterOrEqual = 2,
114  Less = 3,
115  LessOrEqual = 4,
116  NotEqual = 5
117 };

◆ Compute

enum Compute
strong

The Compute enum is now deprecated and it is now being replaced by BackendId.

Enumerator
Undefined 
CpuRef 

CPU Execution: Reference C++ kernels.

CpuAcc 

CPU Execution: NEON: ArmCompute.

GpuAcc 

GPU Execution: OpenCL: ArmCompute.

Definition at line 21 of file BackendId.hpp.

22 {
23  Undefined = 0,
24  /// CPU Execution: Reference C++ kernels
25  CpuRef = 1,
26  /// CPU Execution: NEON: ArmCompute
27  CpuAcc = 2,
28  /// GPU Execution: OpenCL: ArmCompute
29  GpuAcc = 3
30 };

◆ DataLayout

enum DataLayout
strong
Enumerator
NCHW 
NHWC 
NDHWC 
NCDHW 

Definition at line 62 of file Types.hpp.

63 {
64  NCHW = 1,
65  NHWC = 2,
66  NDHWC = 3,
67  NCDHW = 4
68 };

◆ DataType

enum DataType
strong
Enumerator
Float16 
Float32 
QAsymmU8 
Signed32 
Boolean 
QSymmS16 
QSymmS8 
QAsymmS8 
BFloat16 
Signed64 

Definition at line 48 of file Types.hpp.

49 {
50  Float16 = 0,
51  Float32 = 1,
52  QAsymmU8 = 2,
53  Signed32 = 3,
54  Boolean = 4,
55  QSymmS16 = 5,
56  QSymmS8 = 6,
57  QAsymmS8 = 7,
58  BFloat16 = 8,
59  Signed64 = 9,
60 };

◆ Dimensionality

enum Dimensionality
strong
Enumerator
NotSpecified 
Specified 
Scalar 

Definition at line 172 of file Types.hpp.

173 {
174  NotSpecified = 0,
175  Specified = 1,
176  Scalar = 2
177 };

◆ EdgeStrategy

enum EdgeStrategy
strong
Enumerator
Undefined 
DirectCompatibility 

No strategy has been defined. Used internally to verify integrity of optimizations.

ExportToTarget 

Destination backend can work directly with tensors on source backend.

CopyToTarget 

Source backends tensor data can be exported to destination backend tensor without copy.

Copy contents from source backend tensor to destination backend tensor.

Definition at line 104 of file ITensorHandleFactory.hpp.

105 {
106  Undefined, /// No strategy has been defined. Used internally to verify integrity of optimizations.
107  DirectCompatibility, /// Destination backend can work directly with tensors on source backend.
108  ExportToTarget, /// Source backends tensor data can be exported to destination backend tensor without copy.
109  CopyToTarget /// Copy contents from source backend tensor to destination backend tensor.
110 };

◆ FusedKernelType

enum FusedKernelType
strong
Enumerator
AddMulAdd 

Definition at line 266 of file Types.hpp.

267 {
268  AddMulAdd = 0
269 };

◆ GraphEvent

enum GraphEvent
strong
Enumerator
LayerAdded 
LayerErased 

Definition at line 12 of file IGraphObservable.hpp.

13 {
14  LayerAdded,
16 };

◆ JsonObjectType

enum JsonObjectType
strong
Enumerator
Measurement 
Event 
ExecObjectDesc 

Definition at line 20 of file JsonPrinter.hpp.

21 {
23  Event,
25 };

◆ LayerType

enum LayerType
strong

When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.

Enumerator
Activation 
Addition 
ArgMinMax 
BatchNormalization 
BatchToSpaceNd 
Comparison 
Concat 
Constant 
ConvertFp16ToFp32 
ConvertFp32ToFp16 
Convolution2d 
Debug 
DepthToSpace 
DepthwiseConvolution2d 
Dequantize 
DetectionPostProcess 
Division 
ElementwiseUnary 
FakeQuantization 
Fill 
Floor 
FullyConnected 
Gather 
Input 
InstanceNormalization 
L2Normalization 
LogicalBinary 
LogSoftmax 
Lstm 
QLstm 
Map 
Maximum 
Mean 
MemCopy 
MemImport 
Merge 
Minimum 
Multiplication 
Normalization 
Output 
Pad 
Permute 
Pooling2d 
PreCompiled 
Prelu 
Quantize 
QuantizedLstm 
Reshape 
Rank 
Resize 
Reduce 
Slice 
Softmax 
SpaceToBatchNd 
SpaceToDepth 
Splitter 
Stack 
StandIn 
StridedSlice 
Subtraction 
Switch 
Transpose 
TransposeConvolution2d 
Unmap 
Cast 
Shape 
UnidirectionalSequenceLstm 
ChannelShuffle 
Convolution3d 
Pooling3d 
GatherNd 
BatchMatMul 
ElementwiseBinary 
ReverseV2 
Tile 
Fused 
BroadcastTo 
ScatterNd 
FirstLayer 
LastLayer 

Definition at line 491 of file Types.hpp.

492 {
493 #define X(name) name,
495 #undef X
498 };

◆ LogicalBinaryOperation

Enumerator
LogicalAnd 
LogicalOr 

Definition at line 119 of file Types.hpp.

120 {
121  LogicalAnd = 0,
122  LogicalOr = 1
123 };

◆ LogSeverity

enum LogSeverity
strong
Enumerator
Trace 
Debug 
Info 
Warning 
Error 
Fatal 

Definition at line 13 of file Utils.hpp.

14 {
15  Trace,
16  Debug,
17  Info,
18  Warning,
19  Error,
20  Fatal
21 };

◆ MemBlockStrategyType

enum MemBlockStrategyType
strong
Enumerator
SingleAxisPacking 
MultiAxisPacking 

Definition at line 253 of file Types.hpp.

254 {
255  // MemBlocks can be packed on the Y axis only, overlap allowed on X axis.
256  // In other words MemBlocks with overlapping lifetimes cannot use the same MemBin,
257  // equivalent to blob or pooling memory management.
258  SingleAxisPacking = 0,
259 
260  // MemBlocks can be packed on either Y or X axis but cannot overlap on both.
261  // In other words MemBlocks with overlapping lifetimes can use the same MemBin,
262  // equivalent to offset or slab memory management.
263  MultiAxisPacking = 1
264 };

◆ MemorySource

enum MemorySource : uint32_t
strong

Define the Memory Source to reduce copies.

Enumerator
Undefined 
Malloc 
DmaBuf 
DmaBufProtected 
Gralloc 

Definition at line 244 of file Types.hpp.

244  : uint32_t
245 {
246  Undefined = 0,
247  Malloc = 1,
248  DmaBuf = 2,
249  DmaBufProtected = 4,
250  Gralloc = 8
251 };

◆ NormalizationAlgorithmChannel

Enumerator
Across 
Within 

Definition at line 207 of file Types.hpp.

208 {
209  Across = 0,
210  Within = 1
211 };

◆ NormalizationAlgorithmMethod

Enumerator
LocalBrightness 

Krichevsky 2012: Local Brightness Normalization.

LocalContrast 

Jarret 2009: Local Contrast Normalization.

Definition at line 213 of file Types.hpp.

214 {
215  /// Krichevsky 2012: Local Brightness Normalization
216  LocalBrightness = 0,
217  /// Jarret 2009: Local Contrast Normalization
218  LocalContrast = 1
219 };

◆ OutputShapeRounding

enum OutputShapeRounding
strong
Enumerator
Floor 
Ceiling 

Definition at line 221 of file Types.hpp.

222 {
223  Floor = 0,
224  Ceiling = 1
225 };

◆ PaddingMethod

enum PaddingMethod
strong

The padding method modifies the output of pooling layers.

In both supported methods, the values are ignored (they are not even zeroes, which would make a difference for max pooling a tensor with negative values). The difference between IgnoreValue and Exclude is that the former counts the padding fields in the divisor of Average and L2 pooling, while Exclude does not.

Enumerator
IgnoreValue 

The padding fields count, but are ignored.

Exclude 

The padding fields don't count and are ignored.

Definition at line 188 of file Types.hpp.

189 {
190  /// The padding fields count, but are ignored
191  IgnoreValue = 0,
192  /// The padding fields don't count and are ignored
193  Exclude = 1
194 };

◆ PaddingMode

enum PaddingMode
strong

The padding mode controls whether the padding should be filled with constant values (Constant), or reflect the input, either including the border values (Symmetric) or not (Reflect).

Enumerator
Constant 
Reflect 
Symmetric 

Definition at line 200 of file Types.hpp.

201 {
202  Constant = 0,
203  Reflect = 1,
204  Symmetric = 2
205 };

◆ PoolingAlgorithm

enum PoolingAlgorithm
strong
Enumerator
Max 
Average 
L2 

Definition at line 150 of file Types.hpp.

151 {
152  Max = 0,
153  Average = 1,
154  L2 = 2
155 };

◆ ProfilingDetailsMethod

Define the behaviour of the internal profiler when outputting network details.

Enumerator
Undefined 
DetailsWithEvents 
DetailsOnly 

Definition at line 71 of file Types.hpp.

72 {
73  Undefined = 0,
75  DetailsOnly = 2
76 };

◆ QosExecPriority

enum QosExecPriority
strong
Enumerator
Low 
Medium 
High 

Definition at line 79 of file Types.hpp.

80 {
81  Low = 0,
82  Medium = 1,
83  High = 2
84 };

◆ ReduceOperation

enum ReduceOperation
strong
Enumerator
Sum 
Max 
Mean 
Min 
Prod 

Definition at line 157 of file Types.hpp.

158 {
159  Sum = 0,
160  Max = 1,
161  Mean = 2,
162  Min = 3,
163  Prod = 4
164 };

◆ ResizeMethod

enum ResizeMethod
strong
Enumerator
Bilinear 
NearestNeighbor 

Definition at line 166 of file Types.hpp.

167 {
168  Bilinear = 0,
169  NearestNeighbor = 1
170 };

◆ ScatterNdFunction

enum ScatterNdFunction
strong
Enumerator
Update 
Add 
Sub 
Max 
Min 
Mul 

Definition at line 500 of file Types.hpp.

501 {
502  Update = 0,
503  Add = 1,
504  Sub = 2,
505  Max = 3,
506  Min = 4,
507  Mul = 5
508 };

◆ ShapeInferenceMethod

enum ShapeInferenceMethod
strong

The ShapeInferenceMethod modify how the output shapes are treated.

When ValidateOnly is selected, the output shapes are inferred from the input parameters of the layer and any mismatch is reported. When InferAndValidate is selected 2 actions are performed: (1)infer output shape from inputs and (2)validate the shapes as in ValidateOnly. This option has been added to work with tensors which rank or dimension sizes are not specified explicitly, however this information can be calculated from the inputs.

Enumerator
ValidateOnly 

Validate all output shapes.

InferAndValidate 

Infer missing output shapes and validate all output shapes.

Definition at line 235 of file Types.hpp.

236 {
237  /// Validate all output shapes
238  ValidateOnly = 0,
239  /// Infer missing output shapes and validate all output shapes
240  InferAndValidate = 1
241 };

◆ Status

enum Status
strong

enumeration

Enumerator
Success 
Failure 

Definition at line 42 of file Types.hpp.

43 {
44  Success = 0,
45  Failure = 1
46 };

◆ TuningLevel

enum TuningLevel
strong
Enumerator
None 
Rapid 
Normal 
Exhaustive 

Definition at line 18 of file ArmComputeTuningUtils.hpp.

19 {
20  None,
21  Rapid,
22  Normal,
24 };

◆ UnaryOperation

enum UnaryOperation
strong
Enumerator
Abs 
Exp 
Sqrt 
Rsqrt 
Neg 
LogicalNot 
Log 
Sin 
Ceil 

Definition at line 125 of file Types.hpp.

126 {
127  Abs = 0,
128  Exp = 1,
129  Sqrt = 2,
130  Rsqrt = 3,
131  Neg = 4,
132  LogicalNot = 5,
133  Log = 6,
134  Sin = 7,
135  Ceil = 8
136 };

Function Documentation

◆ Activation() [1/2]

void Activation ( Decoder< float > &  in,
Encoder< float > &  out,
const TensorInfo tensorInfo,
ActivationFunction  function,
float  a,
float  b 
)

Definition at line 102 of file Activation.cpp.

108 {
109  unsigned int numElements = tensorInfo.GetNumElements();
110 
111  for (unsigned int i = 0; i < numElements; i++)
112  {
113  out.Set(Activation(in.Get(), function, a, b));
114  ++in;
115  ++out;
116  }
117  in -= numElements;
118  out -= numElements;
119 }

References Activation(), Decoder< IType >::Get(), TensorInfo::GetNumElements(), and Encoder< IType >::Set().

◆ Activation() [2/2]

float Activation ( float  in,
ActivationFunction  function,
float  a,
float  b 
)

Definition at line 13 of file Activation.cpp.

17 {
18  float output;
19 
20  // Compute the result of the activation function.
21  switch (function)
22  {
23  case ActivationFunction::Linear:
24  {
25  output = a * in + b;
26  break;
27  }
28  case ActivationFunction::Sigmoid:
29  {
30  output = 1.f / (1.f + expf(-in));
31  break;
32  }
33  case ActivationFunction::ReLu:
34  {
35  output = std::max(0.f, in);
36  break;
37  }
38  case ActivationFunction::BoundedReLu:
39  {
40  output = std::min(a, std::max(b, in));
41  break;
42  }
43  case ActivationFunction::SoftReLu:
44  {
45  output = logf(1.0f + expf(in));
46  break;
47  }
48  case ActivationFunction::LeakyReLu:
49  {
50  output = in > 0.0f ? in : (in * a);
51  break;
52  }
53  case ActivationFunction::Abs:
54  {
55  output = in < 0 ? -in : in;
56  break;
57  }
58  case ActivationFunction::Sqrt:
59  {
60  output = sqrtf(in);
61  break;
62  }
63  case ActivationFunction::Square:
64  {
65  output = in * in;
66  break;
67  }
68  case ActivationFunction::TanH:
69  {
70  output = a * tanhf(b * in);
71  break;
72  }
73  case ActivationFunction::Elu:
74  {
75  output = (in >= 0) ? in : a * (expf(in) - 1);
76  break;
77  }
78  case ActivationFunction::HardSwish:
79  {
80  // hard_swish(x) = x * relu6(x+3) / 6
81  // relu6(x) = min(max(x,0),6)
82  output = in * (std::min(std::max((in + 3),0.0f),6.0f)) / 6;
83  break;
84  }
85  case ActivationFunction::Gelu:
86  {
87  // gelu(x) = x * 1/2 * (1 + erf(x / sqrt(2))),
88  // where erf is Gaussian error function
89  output = in * (0.5f * (1.0f + erff(static_cast<float>(in / std::sqrt(2)))));
90  break;
91  }
92  default:
93  {
94  throw InvalidArgumentException("Unsupported activation function");
95  }
96  }
97 
98  return output;
99 }

References Abs, BoundedReLu, Elu, Gelu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by Activation(), and LstmImpl().

◆ AllocateOutputData()

void armnn::AllocateOutputData ( unsigned int  numOutput,
unsigned int  numSelected,
const std::vector< float > &  boxCorners,
const std::vector< unsigned int > &  outputIndices,
const std::vector< unsigned int > &  selectedBoxes,
const std::vector< unsigned int > &  selectedClasses,
const std::vector< float > &  selectedScores,
float *  detectionBoxes,
float *  detectionScores,
float *  detectionClasses,
float *  numDetections 
)

Definition at line 101 of file DetectionPostProcess.cpp.

112 {
113  for (unsigned int i = 0; i < numOutput; ++i)
114  {
115  unsigned int boxIndex = i * 4;
116  if (i < numSelected)
117  {
118  unsigned int boxCornorIndex = selectedBoxes[outputIndices[i]] * 4;
119  detectionScores[i] = selectedScores[outputIndices[i]];
120  detectionClasses[i] = armnn::numeric_cast<float>(selectedClasses[outputIndices[i]]);
121  detectionBoxes[boxIndex] = boxCorners[boxCornorIndex];
122  detectionBoxes[boxIndex + 1] = boxCorners[boxCornorIndex + 1];
123  detectionBoxes[boxIndex + 2] = boxCorners[boxCornorIndex + 2];
124  detectionBoxes[boxIndex + 3] = boxCorners[boxCornorIndex + 3];
125  }
126  else
127  {
128  detectionScores[i] = 0.0f;
129  detectionClasses[i] = 0.0f;
130  detectionBoxes[boxIndex] = 0.0f;
131  detectionBoxes[boxIndex + 1] = 0.0f;
132  detectionBoxes[boxIndex + 2] = 0.0f;
133  detectionBoxes[boxIndex + 3] = 0.0f;
134  }
135  }
136  numDetections[0] = armnn::numeric_cast<float>(numSelected);
137 }

Referenced by DetectionPostProcess().

◆ AllTypesAreEqualImpl() [1/2]

bool armnn::AllTypesAreEqualImpl ( t1,
t2,
Rest...  rest 
)

Definition at line 64 of file LayerSupportRules.hpp.

65 {
66  static_assert(std::is_same<T, TensorInfo>::value, "Type T must be a TensorInfo");
67 
68  return (t1.GetDataType() == t2.GetDataType()) && AllTypesAreEqualImpl(t2, rest...);
69 }

References AllTypesAreEqualImpl().

◆ AllTypesAreEqualImpl() [2/2]

bool armnn::AllTypesAreEqualImpl ( )

Definition at line 58 of file LayerSupportRules.hpp.

59 {
60  return true;
61 }

Referenced by AllTypesAreEqualImpl(), and TypesAreEqual::TypesAreEqual().

◆ Append() [1/2]

void armnn::Append ( Optimizer::Optimizations optimizations,
Front &&  front,
Others &&...  others 
)

Definition at line 36 of file Optimizer.hpp.

37 {
38  Append<Front>(optimizations, std::forward<Front>(front));
39  Append<Others...>(optimizations, std::forward<Others>(others)...);
40 };

References Append().

◆ Append() [2/2]

void armnn::Append ( Optimizer::Optimizations optimizations,
T &&  optimization 
)

Definition at line 30 of file Optimizer.hpp.

31 {
32  optimizations.emplace_back(new T(optimization));
33 };

Referenced by Append(), and MakeOptimizations().

◆ ApplyBackendOptimizations()

OptimizationResult armnn::ApplyBackendOptimizations ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
BackendsMap backends,
const ModelOptions modelOptions,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 1320 of file Network.cpp.

1325 {
1326  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ApplyBackendOptimizations")
1327  OptimizationResult result;
1328 
1329  // Get the optimized graph
1330  Graph& optGraph = optNetObjPtr->GetGraph();
1331 
1332  // Run backend specific optimizations
1333  for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
1334  {
1335  auto backendObjPtr = backends.find(selectedBackend)->second.get();
1336  if (!backendObjPtr)
1337  {
1338  throw armnn::NullPointerException("backendObjPtr must not be null.");
1339  }
1340 
1341  if (selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc)
1342  {
1344  Optimizer::Pass(optGraph, MakeOptimizations(optimizations::FusePermuteIntoConstLayer()));
1345  }
1346 
1347  // Select sub-graphs based on backend
1348  SubgraphViewSelector::Subgraphs subgraphs =
1349  SubgraphViewSelector::SelectSubgraphs(optGraph,
1350  // Select layers assigned to the requested backend
1351  [&backendObjPtr](const Layer& layer)
1352  {
1353 
1354  return layer.GetType() != LayerType::Input &&
1355  layer.GetType() != LayerType::Output &&
1356  layer.GetBackendId() == backendObjPtr->GetId();
1357  });
1358  if (subgraphs.empty())
1359  {
1360  // No sub-graphs found, try with next selected backend
1361  continue;
1362  }
1363 
1364  // Try to optimize each sub-graph
1365  for (auto& subgraph : subgraphs)
1366  {
1367  // Try to optimize the current sub-graph
1368  ARMNN_SCOPED_PROFILING_EVENT(backendObjPtr->GetId(), "Optimizer_OptimizeSubgraph");
1369  OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph, modelOptions);
1370  if (!optimizationViews.Validate(*subgraph))
1371  {
1372  throw armnn::Exception("optimizationViews must have a valid subgraph.");
1373  }
1374 
1375  // Optimization attempted, check the resulting optimized sub-graph
1376  for (auto& substitution : optimizationViews.GetSubstitutions())
1377  {
1378  // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
1379  SubgraphView& replacementSubgraph = substitution.m_ReplacementSubgraph;
1380  SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
1381  optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
1382 
1383  // Assign the current backend to the optimized sub-graph
1384  const SubgraphView::IConnectableLayers& subgraphLayers = replacementSubgraph.GetIConnectableLayers();
1385  std::for_each(subgraphLayers.begin(), subgraphLayers.end(), [&selectedBackend](IConnectableLayer* l)
1386  {
1387  PolymorphicDowncast<Layer*>(l)->SetBackendId(selectedBackend);
1388  });
1389  }
1390 
1391  // Remove deleted sub-graphs
1392  for (auto& deletedSubgraph : optimizationViews.GetDeletedSubgraphs())
1393  {
1394  for (auto& l : deletedSubgraph.GetIConnectableLayers())
1395  {
1396  Layer* deletedLayer = PolymorphicDowncast<Layer*>(l);
1397  for (unsigned int in = deletedLayer->GetNumInputSlots(); in > 0; --in)
1398  {
1399  auto inputSlot = deletedLayer->GetInputSlot(in -1);
1400  OutputSlot* parentOut = inputSlot.GetConnectedOutputSlot();
1401  parentOut->Disconnect(inputSlot);
1402  for (unsigned int out = deletedLayer->GetOutputSlot(in -1).GetNumConnections(); out > 0; --out)
1403  {
1404  InputSlot* childIn = deletedLayer->GetOutputSlot(in - 1).GetConnection(out -1);
1405  deletedLayer->GetOutputSlot(in - 1).Disconnect(*childIn);
1406  parentOut->Connect(*childIn);
1407  }
1408  }
1409  optGraph.EraseLayer(deletedLayer);
1410  }
1411  }
1412 
1413  if (!optimizationViews.GetFailedSubgraphs().empty())
1414  {
1415  std::stringstream warningMsg;
1416  warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
1417  ReportWarning(warningMsg.str(), errMessages);
1418 
1419  // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
1420  BackendSettings settingsCopy(backendSettings);
1421  if (!backendObjPtr->GetId().IsCpuRef())
1422  {
1423  // Add the current backend to the list of backends to ignore
1424  settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
1425  }
1426 
1427  int count=0;
1428  for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
1429  {
1430  // An error occurred: the optimization was attempted but not performed, try different backends
1431  std::stringstream subgraphMsg;
1432  subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetIConnectableLayers().size()
1433  << " layers inside sub-graph " << count++;
1434  ReportWarning(subgraphMsg.str(), errMessages);
1435 
1436  OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
1437  settingsCopy,
1438  *subgraph,
1439  errMessages);
1440  if (reassignmentResult.m_Error)
1441  {
1442  // Failed to re-assign one of the remaining backends to each layer of the sub-graph
1443  result.m_Error = true;
1444  return result;
1445  }
1446  }
1447  }
1448  }
1449  }
1450 
1451  return result;
1452 }

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackends(), OutputSlot::Connect(), CpuAcc, OutputSlot::Disconnect(), Graph::EraseLayer(), Layer::GetBackendId(), OutputSlot::GetConnection(), OptimizationViews::GetDeletedSubgraphs(), OptimizationViews::GetFailedSubgraphs(), OptimizedNetworkImpl::GetGraph(), SubgraphView::GetIConnectableLayers(), Layer::GetInputSlot(), OutputSlot::GetNumConnections(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OptimizationViews::GetSubstitutions(), Layer::GetType(), GpuAcc, Input, OptimizationResult::m_Error, BackendSettings::m_IgnoredBackends, BackendSettings::m_SelectedBackends, MakeOptimizations(), Output, Optimizer::Pass(), ReportWarning(), SubgraphViewSelector::SelectSubgraphs(), Graph::SubstituteSubgraph(), Undefined, and OptimizationViews::Validate().

Referenced by Optimize().

◆ ArgMinMax() [1/3]

template void armnn::ArgMinMax ( Decoder< float > &  in,
int32_t *  out,
const TensorInfo inputTensorInfo,
const TensorInfo outputTensorInfo,
ArgMinMaxFunction  function,
int  axis 
)

◆ ArgMinMax() [2/3]

template void armnn::ArgMinMax ( Decoder< float > &  in,
int64_t *  out,
const TensorInfo inputTensorInfo,
const TensorInfo outputTensorInfo,
ArgMinMaxFunction  function,
int  axis 
)

◆ ArgMinMax() [3/3]

void ArgMinMax ( Decoder< float > &  in,
OUT *  out,
const TensorInfo inputTensorInfo,
const TensorInfo outputTensorInfo,
ArgMinMaxFunction  function,
int  axis 
)

Definition at line 17 of file ArgMinMax.cpp.

19 {
20  IgnoreUnused(outputTensorInfo);
21 
22  unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis);
23 
24  const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis);
25  const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis];
26  const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(),
27  uAxis + 1,
28  inputTensorInfo.GetNumDimensions());
29 
30  for (unsigned int outer = 0; outer < outerElements; ++outer) {
31  for (unsigned int inner = 0; inner < innerElements; ++inner) {
32  in[outer * axisSize * innerElements + inner];
33  auto tmpValue = in.Get();
34  unsigned int tmpIndex = 0;
35  for (unsigned int i = 1; i < axisSize; ++i) {
36  in[(outer * axisSize * innerElements) + (i * innerElements) + inner];
37  const auto& value = in.Get();
38  if ((function == armnn::ArgMinMaxFunction::Min && value < tmpValue) ||
39  (function == armnn::ArgMinMaxFunction::Max && value > tmpValue)) {
40  tmpValue = value;
41  tmpIndex = i;
42  }
43  }
44 
45  out[outer * innerElements + inner] = armnn::numeric_cast<OUT>(tmpIndex);
46  }
47  }
48 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), armnnUtils::GetUnsignedAxis(), IgnoreUnused(), Max, and Min.

◆ AssertNumberOfInputSlots()

void armnn::AssertNumberOfInputSlots ( Layer layer)

Definition at line 28 of file Layer.cpp.

29 {
30  switch (layer.GetType())
31  {
32  case LayerType::Convolution2d:
33  case LayerType::DepthwiseConvolution2d:
35  {
36  if (layer.GetNumInputSlots() != 2 && layer.GetNumInputSlots() != 3)
37  {
38  throw armnn::Exception("layer must have either 2 or 3 input slots.");
39  }
40  break;
41  }
42  default:
43  {
44  if (layer.GetNumInputSlots() != 1)
45  {
46  throw armnn::Exception("layer must have one input slot.");
47  }
48  break;
49  }
50  }
51 }

References Convolution2d, DepthwiseConvolution2d, FullyConnected, Layer::GetNumInputSlots(), and Layer::GetType().

Referenced by InputSlot::Insert().

◆ AssignBackends() [1/3]

OptimizationResult AssignBackends ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
Graph::Iterator firstLayer,
Graph::Iterator lastLayer,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 1179 of file Network.cpp.

1184 {
1185  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
1186  OptimizationResult result;
1187 
1188  auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
1189  if (availablePreferredBackends.empty())
1190  {
1191  std::stringstream failureMsg;
1192  failureMsg << "No preferred backends are available";
1193  ReportError(failureMsg.str(), errMessages);
1194 
1195  result.m_Error = true;
1196  return result;
1197  }
1198 
1199  for (auto it = firstLayer; it != lastLayer; ++it)
1200  {
1201  auto layer = PolymorphicDowncast<Layer*>(*it);
1202  std::vector<DataType> inOutDataType = GetLayerInOutDatatype(layer);
1203 
1204  // In AttemptBackendAssignment() we check:
1205  // - if input/output datatypes of the layer are float16
1206  // - if the layer is supported with these datatypes
1207  // If the layer is not supported (failing on ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED() in clframework),
1208  // we attempt to insert convertion layers either side of the new fp32 layer.
1209  bool isFloat16 = false;
1210  for (auto type : inOutDataType)
1211  {
1212  if (type == DataType::Float16)
1213  {
1214  isFloat16 = true;
1215  break;
1216  }
1217  }
1218 
1219  if (layer->GetBackendId() == "Unknown" || isFloat16)
1220  {
1221  AssignBackendsIConnectable(optNetObjPtr,
1222  *it,
1223  errMessages,
1224  result,
1225  backendSettings,
1226  availablePreferredBackends);
1227  }
1228  }
1229 
1230  for (auto it = firstLayer; it != lastLayer; ++it)
1231  {
1232  auto layer = PolymorphicDowncast<Layer*>(*it);
1233 
1234  if(layer->GetType() == LayerType::Input)
1235  {
1236  BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1237  layer->SetBackendId(connectedBackendId);
1238  }
1239  }
1240 
1241  return result;
1242 }

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackendsIConnectable(), Float16, BackendSettings::GetAvailablePreferredBackends(), GetLayerInOutDatatype(), Input, OptimizationResult::m_Error, ReportError(), and Undefined.

Referenced by ApplyBackendOptimizations(), and AssignBackends().

◆ AssignBackends() [2/3]

OptimizationResult armnn::AssignBackends ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
SubgraphView subgraph,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 1288 of file Network.cpp.

1292 {
1293  SubgraphView::IConnectableLayerIterator firstLayer = subgraph.begin();
1294  SubgraphView::IConnectableLayerIterator lastLayer = subgraph.end();
1295  return AssignBackends(optNetObjPtr,
1296  backendSettings,
1297  firstLayer,
1298  lastLayer,
1299  errMessages);
1300 }

References AssignBackends(), SubgraphView::begin(), and SubgraphView::end().

Referenced by Optimize().

◆ AssignBackends() [3/3]

OptimizationResult AssignBackends ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
SubgraphView::IConnectableLayerIterator firstLayer,
SubgraphView::IConnectableLayerIterator lastLayer,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 1244 of file Network.cpp.

1249 {
1250  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
1251  OptimizationResult result;
1252 
1253  auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
1254  if (availablePreferredBackends.empty())
1255  {
1256  std::stringstream failureMsg;
1257  failureMsg << "No preferred backends are available";
1258  ReportError(failureMsg.str(), errMessages);
1259 
1260  result.m_Error = true;
1261  return result;
1262  }
1263 
1264  for (auto it = firstLayer; it != lastLayer; ++it)
1265  {
1266  AssignBackendsIConnectable(optNetObjPtr,
1267  *it,
1268  errMessages,
1269  result,
1270  backendSettings,
1271  availablePreferredBackends);
1272  }
1273 
1274  for (auto it = firstLayer; it != lastLayer; ++it)
1275  {
1276  auto layer = PolymorphicDowncast<Layer*>(*it);
1277 
1278  if(layer->GetType() == LayerType::Input)
1279  {
1280  BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1281  layer->SetBackendId(connectedBackendId);
1282  }
1283  }
1284 
1285  return result;
1286 }

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackendsIConnectable(), BackendSettings::GetAvailablePreferredBackends(), Input, OptimizationResult::m_Error, ReportError(), and Undefined.

◆ AssignBackendsIConnectable()

void armnn::AssignBackendsIConnectable ( OptimizedNetworkImpl optNetObjPtr,
IConnectableLayer it,
Optional< std::vector< std::string > & >  errMessages,
OptimizationResult result,
BackendSettings backendSettings,
std::vector< BackendId > &  availablePreferredBackends 
)

Definition at line 1073 of file Network.cpp.

1079 {
1080  auto ReturnError = [&](const Layer* layer)
1081  {
1082  return ReturnWithError(result, layer, backendSettings, errMessages);
1083  };
1084 
1085  auto layer = PolymorphicDowncast<Layer*>(it);
1086 
1087  if (layer->GetType() == LayerType::Input)
1088  {
1089  return;
1090  }
1091 
1092  std::vector<DataType> inOutDataType = GetLayerInOutDatatype(layer);
1093 
1094  std::string reasonIfUnsupported;
1095  bool found = false;
1096  if (!CheckScaleSetOnQuantizedType(layer, errMessages))
1097  {
1098  // don't bomb immediately, find all the quantized outputs
1099  // which haven't had a scale set and report them all back.
1100  result.m_Error = true;
1101  }
1102 
1103  // First try assign layer to hint backend
1104  if (layer->GetBackendHint().has_value() &&
1105  backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
1106  AttemptBackendAssignment(backendSettings,
1107  optNetObjPtr->GetGraph(),
1108  layer,
1109  layer->GetBackendHint().value(),
1110  inOutDataType[0],
1111  inOutDataType[1],
1112  availablePreferredBackends,
1113  reasonIfUnsupported,
1114  errMessages).IsOk())
1115  {
1116  found = true;
1117  backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
1118  }
1119  else
1120  {
1121  // Try assign layer to prefered list of backends
1122  for (const auto& backend : availablePreferredBackends)
1123  {
1124  if (layer->GetBackendHint().has_value() &&
1125  layer->GetBackendHint().value() == backend)
1126  {
1127  continue; //Don't re-test the backend hint
1128  }
1129 
1130  OptimizationResult res = AttemptBackendAssignment(backendSettings,
1131  optNetObjPtr->GetGraph(),
1132  layer,
1133  backend,
1134  inOutDataType[0],
1135  inOutDataType[1],
1136  availablePreferredBackends,
1137  reasonIfUnsupported,
1138  errMessages);
1139 
1140  if (res.IsOk())
1141  {
1142  found = true;
1143  backendSettings.m_SelectedBackends.insert(backend);
1144  break;
1145  }
1146  else if (res.IsError())
1147  {
1148  result = res; // Cannot continue.
1149  // Note: we don't need to log the error as it would already
1150  // be logged in AttemptBackendAssignment().
1151  }
1152  }
1153  }
1154 
1155  // If the layer is unsupported by any devices, log and return a null network.
1156  if (!found)
1157  {
1158  // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
1159  // fallback we should set the compute device on the layer to CpuRef (these are not
1160  // available as accelerated operations, or are only available under certain
1161  // conditions, currently they comprise MemCopy, Constant, Permute)
1162  armnn::LayerType layerType = layer->GetType();
1163  if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
1164  layerType == armnn::LayerType::Constant ||
1165  layerType == armnn::LayerType::Permute))
1166  {
1167  BackendId cpuBackendId(armnn::Compute::CpuRef);
1168  layer->SetBackendId(cpuBackendId);
1169  backendSettings.m_SelectedBackends.insert(cpuBackendId);
1170  }
1171  else
1172  {
1173  result = ReturnError(layer);
1174  }
1175  }
1176 
1177 }

References AttemptBackendAssignment(), CheckScaleSetOnQuantizedType(), Constant, CpuRef, OptimizedNetworkImpl::GetGraph(), GetLayerInOutDatatype(), Input, BackendSettings::IsBackendSupported(), BackendSettings::IsCpuRefUsed(), OptimizationResult::IsError(), OptimizationResult::IsOk(), OptimizationResult::m_Error, BackendSettings::m_SelectedBackends, MemCopy, Permute, and ReturnWithError().

Referenced by AssignBackends().

◆ AssignSplitId()

void armnn::AssignSplitId ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo 
)

Definition at line 322 of file SubgraphViewSelector.cpp.

323 {
324  // Check each input to see if we can attach ourselves to any of the subgraphs that have already been assigned.
325  ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo)
326  {
327  // We can only attach ourselves to the subgraph from this input if there isn't a cut here.
328  if (layerInfo.m_IsSelected == parentInfo.m_IsSelected)
329  {
330  // We also need to check that merging into this subgraph won't cause a dependency cycle between subgraphs.
331  // This will be the case if the subgraph that we will become part of is already a dependency
332  // of one of the subgraphs that are input to this layer, e.g:
333  //
334  // 0 | The numbers (0, 1) are the subgraph IDs of each layer and we are looking at layer X.
335  // / \ |
336  // 1 0 | We can't merge X into subgraph 0, because the left-hand input already depends on subgraph 0.
337  // \ / | We can however merge X into subgraph 1.
338  // X |
339  //
340  bool dependenciesOk = true;
341  ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& otherParentInfo)
342  {
343  // We call HasAntecedent() ~ n^2 times, where n is the number of inputs to this layer.
344  // Hence it is important that this is efficient - see PartialSubgraph class description.
345  if (otherParentInfo.m_Subgraph->HasAntecedent(parentInfo.m_Subgraph.get()))
346  {
347  dependenciesOk = false;
348  }
349  });
350 
351  if (dependenciesOk)
352  {
353  // Merge into the subgraph of this input. If we have already been merged into another subgraph
354  // (from another input of this layer), then merge both of them together.
355  if (layerInfo.m_Subgraph == nullptr)
356  {
357  layerInfo.m_Subgraph = parentInfo.m_Subgraph;
358  }
359  else
360  {
361  // We call MergeWith() ~ n times, where n is the number of inputs to this layer.
362  // Therefore it does not need to be as performant as HasAntecedent().
363  layerInfo.m_Subgraph->MergeWith(parentInfo.m_Subgraph.get());
364  }
365  }
366  }
367  });
368 
369  // If we weren't able to merge into an existing subgraph then we need to make a new one
370  if (layerInfo.m_Subgraph == nullptr)
371  {
372  layerInfo.m_Subgraph = std::make_shared<PartialSubgraph>();
373  }
374 
375  // Record dependencies of the chosen subgraph based on the inputs of this layer.
376  ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo)
377  {
378  // These functions are called ~n times, where n is the number of inputs to this layer.
379  // Therefore it does not need to be as performant as HasAntecedent().
380  if (!layerInfo.m_Subgraph->IsMergedWith(parentInfo.m_Subgraph.get()))
381  {
382  layerInfo.m_Subgraph->AddDirectAntecedent(parentInfo.m_Subgraph.get());
383  }
384  });
385 }

References ForEachLayerInput().

Referenced by SubgraphViewSelector::SelectSubgraphs().

◆ AttemptBackendAssignment()

OptimizationResult armnn::AttemptBackendAssignment ( BackendSettings backendSettings,
Graph graph,
Layer layer,
BackendId  backend,
DataType  dataTypeIn,
DataType  dataTypeOut,
const std::vector< BackendId > &  availablePreferredBackends,
std::string &  reasonIfUnsupported,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 844 of file Network.cpp.

853 {
854  OptimizationResult result;
855 
856  // Helper lambda to compose meaningful error message before returning with error
857  auto ReturnError = [&](const Layer* layer)
858  {
859  return ReturnWithError(result, layer, backendSettings, errMessages);
860  };
861 
862  // need to set the compute device on the layer
863  // before we can check if it is supported
864  layer->SetBackendId(backend);
865  std::string currentReasonIfUnsupported;
866 
867  // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture
868  // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs
869  // to be FP32 and inserting convert layers around the FP32 operator.
870  bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), currentReasonIfUnsupported);
871  reasonIfUnsupported += currentReasonIfUnsupported;
872  // This string matches the error message that is produced by acl when attempting to run FP16 kernels on
873  // a cpu or build that does not have fp16 support. We use this to check if we should add
874  // conversion layers or not.
875  std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
876  if (!isLayerSupported || currentReasonIfUnsupported.find(checkStr) != std::string::npos)
877  {
878  if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
879  {
880  if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
881  && layer->GetType() != LayerType::ConvertFp32ToFp16
882  && layer->GetType() != LayerType::ConvertFp16ToFp32)
883  {
884  auto ConstantLayerFromFp16ToFp32 = [](Layer& layer)
885  {
886  if (layer.GetType() == LayerType::Constant)
887  {
888  ConstantLayer* constantLayer = PolymorphicDowncast<ConstantLayer*>(&layer);
889 
890  auto& info = constantLayer->m_LayerOutput->GetTensorInfo();
891 
892  if (info.GetDataType() == DataType::Float16)
893  {
894  std::vector<float> newValues(info.GetNumElements());
895 
897  constantLayer->m_LayerOutput->GetConstTensor<Half>(),
898  info.GetNumElements(),
899  newValues.data());
900 
901  TensorInfo newInfo(info);
902  newInfo.SetDataType(DataType::Float32);
903  ConstTensor newInput(newInfo, newValues);
904  constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
905 
906  layer.GetOutputSlot(0).SetTensorInfo(newInfo);
907  }
908  }
909  };
910 
911  bool checkType = false;
912 
913  for (auto inputSlot : layer->GetInputSlots())
914  {
915  auto connectedOutputSlot = inputSlot.GetConnectedOutputSlot();
916  if (connectedOutputSlot->GetOwningLayer().GetType() == LayerType::Constant)
917  {
918  if (connectedOutputSlot->GetNumConnections() == 1)
919  {
920  checkType = true;
921  ConstantLayerFromFp16ToFp32(connectedOutputSlot->GetOwningLayer());
922  }
923  }
924  }
925 
926  // Insert FP16 -> FP32 conversion layer before current layer
927  std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
928  if (dataTypeIn == DataType::Float16)
929  {
930  convertFp16ToFp32Layers =
931  InsertConvertFp16ToFp32LayersBefore(graph, *layer, checkType);
932  }
933 
934  // Insert FP32 -> FP16 conversion layer after current layer
935  std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
936  if (dataTypeOut == DataType::Float16)
937  {
938  convertFp32ToFp16Layers =
939  InsertConvertFp32ToFp16LayersAfter(graph, *layer);
940  }
941 
942  // Assign a supported backend to the newly introduced conversion layers
943  auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
944  {
945  bool supportedBackendFound = false;
946  std::string reasonIfUnsupported;
947 
948  // Try preferred backend first
949  layer->SetBackendId(preferredBackend);
951  EmptyOptional(),
952  reasonIfUnsupported))
953  {
954  supportedBackendFound = true;
955  }
956  else
957  {
958  for (const auto& backend : availablePreferredBackends)
959  {
960  // Skip preferred backend (we already determined that it is not supported)
961  if (backend == preferredBackend)
962  {
963  continue;
964  }
965 
966  layer->SetBackendId(backend);
968  EmptyOptional(),
969  reasonIfUnsupported))
970  {
971  supportedBackendFound = true;
972  break;
973  }
974  }
975  }
976 
977  return supportedBackendFound;
978  };
979 
980  for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
981  {
982  if (!AssignFirstSupportedBackend(convertLayer, backend))
983  {
984  return ReturnError(convertLayer);
985  }
986  }
987 
988  for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
989  {
990  if (!AssignFirstSupportedBackend(convertLayer, backend))
991  {
992  return ReturnError(convertLayer);
993  }
994  }
995 
996  return result;
997  }
998  }
999 
1000  std::stringstream warningMsg;
1001  warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
1002  << " is not supported on requested backend " << layer->GetBackendId().Get()
1003  << " for input data type " << GetDataTypeName(dataTypeIn)
1004  << " and output data type " << GetDataTypeName(dataTypeOut)
1005  << " (reason: " << reasonIfUnsupported
1006  << "), falling back to the next backend.";
1007  ReportWarning(warningMsg.str(), errMessages);
1008 
1009  return OptimizationResult(true, false);
1010  }
1011  else
1012  {
1013  return result;
1014  }
1015 }

References Constant, FloatingPointConverter::ConvertFloat16To32(), ConvertFp16ToFp32, ConvertFp32ToFp16, Float16, Float32, BackendId::Get(), Layer::GetBackendId(), GetDataTypeName(), Layer::GetInputSlots(), GetLayerTypeAsCString(), Layer::GetOutputSlot(), Layer::GetType(), info, InsertConvertFp16ToFp32LayersBefore(), InsertConvertFp32ToFp16LayersAfter(), IWorkloadFactory::IsLayerSupported(), ConstantLayer::m_LayerOutput, ReportWarning(), ReturnWithError(), Layer::SetBackendId(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AssignBackendsIConnectable().

◆ BackendRegistryInstance()

◆ BatchNormImpl()

void BatchNormImpl ( const BatchNormalizationQueueDescriptor data,
Decoder< float > &  meanDecoder,
Decoder< float > &  varianceDecoder,
Decoder< float > &  betaDecoder,
Decoder< float > &  gammaDecoder,
Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 18 of file BatchNormImpl.cpp.

25 {
26  const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
27  const TensorShape inputShape = inputInfo.GetShape();
28 
29  armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
30 
31  unsigned int inputBatches = inputShape[0];
32  unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
33  unsigned int inputWidth = inputShape[dataLayout.GetWidthIndex()];
34  unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
35 
36  for (unsigned int c = 0; c < inputChannels; c++)
37  {
38  meanDecoder[c];
39  varianceDecoder[c];
40  betaDecoder[c];
41  gammaDecoder[c];
42  float mean = meanDecoder.Get();
43  float var = varianceDecoder.Get();
44  float beta = betaDecoder.Get();
45  float gamma = gammaDecoder.Get();
46 
47  float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps);
48  float add = beta - mult * mean;
49 
50  for (unsigned int n = 0; n < inputBatches; n++)
51  {
52  for (unsigned int h = 0; h < inputHeight; h++)
53  {
54  for (unsigned int w = 0; w < inputWidth; w++)
55  {
56  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
57  inputDecoder[index];
58  outputEncoder[index];
59  outputEncoder.Set(mult * inputDecoder.Get() + add);
60  }
61  }
62  }
63  }
64 }

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), GetTensorInfo(), DataLayoutIndexed::GetWidthIndex(), BatchNormalizationDescriptor::m_DataLayout, BatchNormalizationDescriptor::m_Eps, QueueDescriptor::m_Inputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

◆ BatchToSpaceNd()

void BatchToSpaceNd ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const BatchToSpaceNdDescriptor params,
Decoder< float > &  inputData,
Encoder< float > &  outputData 
)

Definition at line 50 of file BatchToSpaceNd.cpp.

55 {
56  unsigned int rank = inputInfo.GetNumDimensions();
57  if (rank != 3 && rank != 4 )
58  {
59  throw InvalidArgumentException("Tensor rank must be either 3 or 4, but it is " + std::to_string(rank),
60  CHECK_LOCATION());
61  }
62 
63  DataLayoutIndexed dataLayout = params.m_DataLayout;
64  unsigned int channelDimension3D = params.m_DataLayout == DataLayout::NCHW ? 1 : 2;
65 
66  TensorShape inputShape = inputInfo.GetShape();
67  TensorShape outputShape = outputInfo.GetShape();
68 
69  const unsigned int inputBatchSize = inputShape[0];
70  const unsigned int outputBatchSize = outputShape[0];
71 
72  const unsigned int channels = (rank == 3) ? inputShape[channelDimension3D]
73  : inputShape[dataLayout.GetChannelsIndex()];
74 
75  const unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
76  const unsigned int inputWidth = (rank == 3) ? 1 : inputShape[dataLayout.GetWidthIndex()];
77  const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
78  const unsigned int outputWidth = (rank == 3) ? 1 : outputShape[dataLayout.GetWidthIndex()];
79 
80  const unsigned int blockHeight = params.m_BlockShape[0];
81  const unsigned int blockWidth = (rank == 3) ? 1 : params.m_BlockShape[1];
82 
83  const unsigned int cropsTop = params.m_Crops[0].first;
84  const unsigned int cropsLeft = (rank == 3) ? 0 : params.m_Crops[1].first;
85 
86  for (unsigned int inBatch = 0; inBatch < inputBatchSize; ++inBatch)
87  {
88  const unsigned int outBatch = inBatch % outputBatchSize;
89  const unsigned int spatialOffset = inBatch / outputBatchSize;
90 
91  for (unsigned int inH = 0; inH < inputHeight; ++inH)
92  {
93  const unsigned int outH = inH * blockHeight + spatialOffset / blockWidth - cropsTop;
94 
95  if (outH >= outputHeight)
96  {
97  continue;
98  }
99 
100  for (unsigned int inW = 0; inW < inputWidth; ++inW)
101  {
102  const unsigned int outW = inW * blockWidth + spatialOffset % blockWidth - cropsLeft;
103 
104  if (outW >= outputWidth)
105  {
106  continue;
107  }
108 
109  for (unsigned int c = 0; c < channels; c++)
110  {
111  unsigned int outOffset = Offset(outputShape, outBatch, outH, outW, c, dataLayout);
112  unsigned int inOffset = Offset(inputShape, inBatch, inH, inW, c, dataLayout);
113 
114  outputData[outOffset];
115  inputData[inOffset];
116  outputData.Set(inputData.Get());
117  }
118  }
119  }
120  }
121 }

References BatchToSpaceNd(), CHECK_LOCATION, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetNumDimensions(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), BatchToSpaceNdDescriptor::m_BlockShape, BatchToSpaceNdDescriptor::m_Crops, BatchToSpaceNdDescriptor::m_DataLayout, Offset(), and Encoder< IType >::Set().

Referenced by BatchToSpaceNd(), and BatchToSpaceNdLayer::BatchToSpaceNdLayer().

◆ BuildAddMulAddSlotLists()

void armnn::BuildAddMulAddSlotLists ( bool  handleReLu,
bool  multipleOutputs,
std::vector< SlotListType > &  inputLayersSlotLists,
std::vector< SlotListType > &  outputLayersSlotLists 
)

Definition at line 36 of file NeonBackendOptimizationUtils.hpp.

40 {
41  // Build input slot list
42  inputLayersSlotLists.push_back({0, 1}); // Add
43  inputLayersSlotLists.push_back({1}); // Mul
44  inputLayersSlotLists.push_back({1}); // Add
45  if (handleReLu)
46  {
47  inputLayersSlotLists.push_back({}); // Relu
48  }
49 
50  // Build output slot list
51  if (multipleOutputs)
52  {
53  outputLayersSlotLists.push_back({0}); // Add
54  }
55  else
56  {
57  outputLayersSlotLists.push_back({}); // Add
58  }
59  outputLayersSlotLists.push_back({}); // Mul
60  if (handleReLu)
61  {
62  outputLayersSlotLists.push_back({}); // Add
63  outputLayersSlotLists.push_back({0}); // Relu
64  }
65  else
66  {
67  outputLayersSlotLists.push_back({0}); // Add
68  }
69 }

◆ BuildAddMulAddTensorInfoLists()

bool armnn::BuildAddMulAddTensorInfoLists ( Type *  layerList[4],
unsigned int &  numInputs,
unsigned int &  numOutputs,
std::vector< TensorInfo > &  inputInfos,
std::vector< TensorInfo > &  outputInfos,
const ActivationDescriptor *&  activationDescriptor,
bool &  fuseReLu 
)

Definition at line 87 of file NeonBackendOptimizationUtils.hpp.

94 {
95  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[0]);
96  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[1]);
97  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[2]);
98 
99  ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[0], BinaryOperation::Add));
100  ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[1], BinaryOperation::Mul));
101  ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[2], BinaryOperation::Add));
102 
103  auto is1D = [](const TensorInfo expanded)
104  {
105  TensorInfo collapsed;
106  if (CollapseLeadingUnitDimensions(expanded, collapsed))
107  {
108  return (collapsed.GetNumDimensions() == 1);
109  }
110  else
111  {
112  return (expanded.GetNumDimensions() == 1);
113  }
114  };
115 
116  // One of the 2 inputs for MUL and the Second ADD must be 1D
117  // ref: clframework/src/cpu/kernels/CpuAddMulAddKernel.cpp
118  auto& mulLayer = *(PolymorphicDowncast<ElementwiseBinaryLayer*>(layerList[1]));
119  auto& add2Layer = *(PolymorphicDowncast<ElementwiseBinaryLayer*>(layerList[2]));
120 
121  Layer& mulInput0 = mulLayer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
122  Layer& mulInput1 = mulLayer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
123  Layer& add2Input0 = add2Layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
124  Layer& add2Input1 = add2Layer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
125  if (!is1D(mulInput0.GetOutputSlot(0).GetTensorInfo()) && !is1D(mulInput1.GetOutputSlot(0).GetTensorInfo()))
126  {
127  return false;
128  }
129  if (!is1D(add2Input0.GetOutputSlot(0).GetTensorInfo()) && !is1D(add2Input1.GetOutputSlot(0).GetTensorInfo()))
130  {
131  return false;
132  }
133 
134  fuseReLu = (layerList[3] != nullptr);
135  if (fuseReLu)
136  {
137  activationDescriptor = &PolymorphicDowncast<ActivationLayer *>(layerList[3])->GetParameters();
138  ARMNN_THROW_INVALIDARG_IF_FALSE((activationDescriptor->m_Function == ActivationFunction::ReLu) ||
139  (activationDescriptor->m_Function == ActivationFunction::BoundedReLu));
140  }
141 
142  numInputs = 0;
143  numOutputs = 0;
144 
145  // Ensure that there are 6 input slots in the add/mul/add layers
146  // we are going to replace
147  unsigned int layerIdx = 0;
148  unsigned int inputSlotCount = 0;
149  for (layerIdx = 0; layerIdx < 3; ++layerIdx)
150  {
151  for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumInputSlots(); ++slotIdx)
152  {
153  InputSlot* inputSlot = &layerList[layerIdx]->GetInputSlot(slotIdx);
154  OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
155  if (outputSlot)
156  {
157  if (layerIdx == 0)
158  {
159  // Always count the input connections of the first add
160  inputInfos.push_back(inputSlot->GetTensorInfo());
161  numInputs++;
162  }
163  else
164  {
165  // For subsequent layers, we skip connections to the previous layers in the counting
166  if (&outputSlot->GetOwningLayer() != layerList[layerIdx-1])
167  {
168  TensorInfo inputSlotInfo = inputSlot->GetTensorInfo();
169  if (numInputs == 2 || numInputs == 3)
170  {
171  // Workaround the broadcast optimization to collapse shapes such as
172  // [1, 1, 1, 2] to [2] as required by backend
173  if (CollapseLeadingUnitDimensions(inputSlot->GetTensorInfo(), inputSlotInfo))
174  {
175  OutputSlot* previousLayerSlot = inputSlot->GetConnectedOutputSlot();
176  if (previousLayerSlot)
177  {
178  if (previousLayerSlot->GetOwningLayer().GetType() == LayerType::Constant)
179  {
180  // First update the TensorInfo in the constant owning layer
181  previousLayerSlot->SetTensorInfo(inputSlotInfo);
182  // Then update the TensorInfo in the workload for the owning layer
183  ConstantLayer* layer = PolymorphicDowncast<ConstantLayer*>(
184  &previousLayerSlot->GetOwningLayer());
185  layer->m_LayerOutput
186  = std::make_unique<ScopedTensorHandle>(
187  ConstTensor(inputSlotInfo,
188  layer->m_LayerOutput.get()->GetConstTensor<void>()));
189  }
190  }
191  }
192  }
193  inputInfos.push_back(inputSlotInfo);
194  numInputs++;
195  }
196  }
197  inputSlotCount++;
198  }
199  }
200  }
201 
202  // Check the input counts
203  bool validInputCount = (inputSlotCount == 6) && (inputInfos.size() == 4);
204  if (! validInputCount)
205  {
206  return false;
207  }
208 
209  const unsigned int maxIdx = (fuseReLu) ? 4 : 3;
210  for (layerIdx = 0; layerIdx < maxIdx; ++layerIdx)
211  {
212  for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumOutputSlots(); ++slotIdx)
213  {
214  OutputSlot* outputSlot = &layerList[layerIdx]->GetOutputSlot(slotIdx);
215 
216  for (unsigned int connectionIdx = 0; connectionIdx < outputSlot->GetNumConnections(); ++connectionIdx)
217  {
218  InputSlot* inputSlot = outputSlot->GetConnection(connectionIdx);
219  if (layerIdx < (maxIdx-1))
220  {
221  if (&inputSlot->GetOwningLayer() != layerList[layerIdx+1])
222  {
223  outputInfos.push_back(outputSlot->GetTensorInfo());
224  numOutputs++;
225  }
226  }
227  else if (layerList[layerIdx] != nullptr)
228  {
229  outputInfos.push_back(outputSlot->GetTensorInfo());
230  numOutputs++;
231  }
232  }
233  }
234  }
235 
236  // Check the output count
237  bool validOutputCount = (outputInfos.size() > 0);
238  if (! validOutputCount)
239  {
240  return false;
241  }
242 
243  return true;
244 }

References Add, ARMNN_THROW_INVALIDARG_IF_FALSE, BoundedReLu, CollapseLeadingUnitDimensions(), Constant, InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), Layer::GetInputSlot(), OutputSlot::GetNumConnections(), TensorInfo::GetNumDimensions(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), InputSlot::GetTensorInfo(), OutputSlot::GetTensorInfo(), Layer::GetType(), IsSequenceLayerType(), ActivationDescriptor::m_Function, ConstantLayer::m_LayerOutput, Mul, ReLu, and OutputSlot::SetTensorInfo().

◆ CalcLevel()

int armnn::CalcLevel ( const Event eventPtr)

Definition at line 257 of file Profiling.cpp.

258 {
259  int level = 0;
260  while (eventPtr != nullptr)
261  {
262  eventPtr = eventPtr->GetParentEvent();
263  level++;
264  }
265  return level;
266 }

References Event::GetParentEvent().

Referenced by ProfilerImpl::AnalyzeEventsAndWriteResults(), and ProfilerImpl::PopulateParent().

◆ CalculateEdgeStrategy()

EdgeStrategy armnn::CalculateEdgeStrategy ( BackendsMap backends,
ITensorHandleFactory::FactoryId  srcFactoryId,
const Layer layer,
const Layer connectedLayer,
TensorHandleFactoryRegistry registry,
bool  importEnabled 
)

Definition at line 1729 of file Network.cpp.

1735 {
1736  auto toBackend = backends.find(connectedLayer.GetBackendId());
1737  if (toBackend == backends.end())
1738  {
1739  throw armnn::Exception("Backend id not found for the connected layer");
1740  }
1741 
1742  auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1743 
1744  // Legacy API check for backward compatibility
1745  if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
1746  {
1747  if (layer.GetBackendId() != connectedLayer.GetBackendId())
1748  {
1749  return EdgeStrategy::CopyToTarget;
1750  }
1751  else
1752  {
1753  return EdgeStrategy::DirectCompatibility;
1754  }
1755  }
1756 
1757  // TensorHandleFactory API present, so perform more sophisticated strategies.
1758  // Dst Output layers don't require copy because they use import or map/unmap
1759  if (connectedLayer.GetType() == LayerType::Output)
1760  {
1761  return EdgeStrategy::DirectCompatibility;
1762  }
1763 
1764  // Search for direct match in prefs
1765  for (auto&& pref : dstPrefs)
1766  {
1767  if (pref == srcFactoryId)
1768  {
1769  return EdgeStrategy::DirectCompatibility;
1770  }
1771  }
1772 
1773  // Search for export/import options
1774  ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
1775  if (srcFactory->GetExportFlags() != 0 && importEnabled)
1776  {
1777  for (auto&& pref : dstPrefs)
1778  {
1779  ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
1780 
1781  // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
1782  if (!dstFactory) {
1783  continue;
1784  }
1785  if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
1786  {
1787  auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
1788  auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
1789  &connectedLayer,
1790  CapabilityClass::PaddingRequired);
1791  auto srcFallback = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1792  auto dstFallback = dstFactory->GetCapabilities(&connectedLayer,
1793  &connectedLayer,
1794  CapabilityClass::FallbackImportDisabled);
1795  // Do not require memory copy if the source and destination do not require padding.
1796  if (srcCapability.empty() && dstCapability.empty() && srcFallback.empty() && dstFallback.empty())
1797  {
1798  return EdgeStrategy::ExportToTarget;
1799  }
1800  }
1801  }
1802  }
1803 
1804  // Search for copy options via map/unmap
1805  if (srcFactory->SupportsMapUnmap())
1806  {
1807  for (auto&& pref : dstPrefs)
1808  {
1809  ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
1810  if (dstFactory && dstFactory->SupportsMapUnmap())
1811  {
1812  return EdgeStrategy::CopyToTarget;
1813  }
1814  }
1815  }
1816 
1817  return EdgeStrategy::Undefined;
1818 }

References CopyToTarget, DirectCompatibility, ExportToTarget, FallbackImportDisabled, Layer::GetBackendId(), ITensorHandleFactory::GetCapabilities(), ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), ITensorHandleFactory::GetImportFlags(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, Output, PaddingRequired, ITensorHandleFactory::SupportsMapUnmap(), and Undefined.

◆ CalculateGatherNdKeyIndices()

std::map< std::string, unsigned int > CalculateGatherNdKeyIndices ( TensorInfo  inputInfo0,
TensorInfo  inputInfo1 
)

Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)

Parameters
inputInfo0- TensorInfo of the corresponding input tensor: params
inputInfo1- TensorInfo of the corresponding input tensor: indices
Returns
- A map with names and values for N, ND, K, W, C

Definition at line 313 of file WorkloadUtils.cpp.

314 {
315  std::vector<unsigned int> paramsShape;
316  for (unsigned int i = 0; i < inputInfo0.GetNumDimensions(); ++i)
317  {
318  paramsShape.push_back(inputInfo0.GetShape()[i]);
319  }
320 
321  std::vector<unsigned int> indicesShape;
322  for (unsigned int i = 0; i < inputInfo1.GetNumDimensions(); ++i)
323  {
324  indicesShape.push_back(inputInfo1.GetShape()[i]);
325  }
326 
327  std::map<std::string, unsigned int> keyIndices;
328 
329  // N: number of batches
330  keyIndices["N"] = 1;
331 
332  // ND: number of dimensions that are sliced from params
333  keyIndices["ND"] = indicesShape.back();
334 
335  // W: number of indices in each batch (all but the last dimension)
336  keyIndices["W"] =
337  static_cast<unsigned int>(std::accumulate(std::begin(indicesShape),
338  std::end(indicesShape) - 1,
339  1,
340  std::multiplies<>() ));
341  // K: range of each index
342  keyIndices["K"] =
343  static_cast<unsigned int>(std::accumulate(std::begin(paramsShape),
344  std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
345  1,
346  std::multiplies<>() ));
347  // C: number of channels for each index
348  keyIndices["C"] =
349  static_cast<unsigned int>(std::accumulate(std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
350  std::end(paramsShape),
351  1,
352  std::multiplies<>() ));
353 
354  return keyIndices;
355 }

References TensorInfo::GetNumDimensions(), and TensorInfo::GetShape().

Referenced by ClGatherNdWorkload::ClGatherNdWorkload(), ClGatherNdWorkloadValidate(), NeonGatherNdWorkload::NeonGatherNdWorkload(), and NeonGatherNdWorkloadValidate().

◆ CalculateSlotOption()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOption ( BackendsMap backends,
OutputSlot outputSlot,
TensorHandleFactoryRegistry registry,
bool  exportEnabled 
)

Definition at line 1576 of file Network.cpp.

1580 {
1581  // First ensure the from backends can support the TensorHandeAPI
1582  Layer& layer = outputSlot.GetOwningLayer();
1583  auto frmBackend = backends.find(layer.GetBackendId());
1584  if (frmBackend == backends.end() ||
1585  !frmBackend->second->SupportsTensorAllocatorAPI())
1586  {
1587  return ITensorHandleFactory::LegacyFactoryId;
1588  }
1589 
1590  bool outputConnection = false;
1591  for (auto&& connection : outputSlot.GetConnections())
1592  {
1593  const Layer& connectedLayer = connection->GetOwningLayer();
1594  if (connectedLayer.GetType() == LayerType::Output)
1595  {
1596  outputConnection = true;
1597  }
1598  }
1599 
1600  IBackendInternal* srcBackend = frmBackend->second.get();
1601  auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
1602 
1603  // Initialize the scores
1604  std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1605  for (auto&& pref : srcPrefs)
1606  {
1607  if (exportEnabled)
1608  {
1609  ITensorHandleFactory* factory = registry.GetFactory(pref);
1610  if (outputConnection)
1611  {
1612  // Check if this is fallback case
1613  bool fallbackConnection = false;
1614  for (auto&& inputSlot : layer.GetInputSlots())
1615  {
1616  if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetBackendId() != layer.GetBackendId())
1617  {
1618  fallbackConnection = true;
1619  }
1620  }
1621  if (fallbackConnection)
1622  {
1623  auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1624  // Cannot use factory import if fallback import is not supported.
1625  if (!factoryCap.empty())
1626  {
1627  continue;
1628  }
1629  }
1630  else if (factory->GetExportFlags() == 0)
1631  {
1632  continue;
1633  }
1634  }
1635  if (!outputConnection)
1636  {
1637  auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1638  // Cannot use factory import if fallback import is not supported.
1639  if (!factoryCap.empty())
1640  {
1641  continue;
1642  }
1643  }
1644 
1645  }
1646  else
1647  {
1648  // Only consider factories that support map/unmap
1649  ITensorHandleFactory* factory = registry.GetFactory(pref);
1650  if (!factory->SupportsMapUnmap())
1651  {
1652  // The current tensor handle factory does not support the map/unmap strategy, move to the next one
1653  continue;
1654  }
1655  }
1656 
1657 
1658  auto it = factoryScores.find(pref);
1659  if (it == factoryScores.end())
1660  {
1661  // Add new score to the table
1662  factoryScores[pref] = 0;
1663  }
1664  }
1665 
1666  // Score each handle factory based on how many times it requires copies on the slot connections
1667  for (auto&& connection : outputSlot.GetConnections())
1668  {
1669  const Layer& connectedLayer = connection->GetOwningLayer();
1670 
1671  auto toBackend = backends.find(connectedLayer.GetBackendId());
1672  if (toBackend == backends.end())
1673  {
1674  throw armnn::Exception("Backend id not found for the connected layer");
1675  }
1676 
1677  auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1678  for (auto&& src : srcPrefs)
1679  {
1680  if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
1681  {
1682  continue;
1683  }
1684 
1685  for (auto&& dst : dstPrefs)
1686  {
1687  if (RequiresCopy(src, dst, registry))
1688  {
1689  // Copy avoided, increase the score
1690  factoryScores[src]++;
1691  break;
1692  }
1693  }
1694  }
1695  }
1696 
1697  // Find the lowest score
1698  int minScore = std::numeric_limits<int>::max();
1699  for (auto it : factoryScores)
1700  {
1701  minScore = std::min(minScore, it.second);
1702  }
1703 
1704  // Collect factories matching the best(lowest) score
1705  std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
1706  for (auto it : factoryScores)
1707  {
1708  if (it.second == minScore)
1709  {
1710  optimalFactories.push_back(it.first);
1711  }
1712  }
1713 
1714  // For all compatible Factories matching the best score, find the preferred one for the current layer.
1715  for (auto&& srcPref : srcPrefs)
1716  {
1717  for (auto&& comp : optimalFactories)
1718  {
1719  if (comp == srcPref)
1720  {
1721  return comp;
1722  }
1723  }
1724  }
1725 
1726  return ITensorHandleFactory::LegacyFactoryId;
1727 }

References FallbackImportDisabled, Layer::GetBackendId(), ITensorHandleFactory::GetCapabilities(), OutputSlot::GetConnections(), ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), IBackendInternal::GetHandleFactoryPreferences(), Layer::GetInputSlots(), OutputSlot::GetOwningLayer(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, Output, RequiresCopy(), and ITensorHandleFactory::SupportsMapUnmap().

◆ CalculateSlotOptionForInput()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOptionForInput ( BackendsMap backends,
OutputSlot slot,
TensorHandleFactoryRegistry registry,
bool  importEnabled 
)

Definition at line 1474 of file Network.cpp.

1478 {
1479  Layer& layer = slot.GetOwningLayer();
1480 
1481  if (layer.GetType() != LayerType::Input)
1482  {
1483  throw armnn::Exception("layer must be of type \"Input\".");
1484  }
1485 
1486  // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
1487  // doesn't matter which backend it is assigned to because they all use the same implementation, which
1488  // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
1489  // select a factory with maximum compatibility with the layers connected to the InputLayer.
1490 
1491  // First ensure the from backends can support the TensorHandeAPI
1492  auto frmBackend = backends.find(layer.GetBackendId());
1493  if (frmBackend == backends.end() ||
1494  !frmBackend->second->SupportsTensorAllocatorAPI())
1495  {
1496  return ITensorHandleFactory::LegacyFactoryId;
1497  }
1498 
1499  // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
1500  // fewest copies.
1501  std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1502  int topScore = 0;
1503  ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
1504 
1505  for (auto&& connection : slot.GetConnections())
1506  {
1507 
1508  const Layer& connectedLayer = connection->GetOwningLayer();
1509 
1510  auto toBackend = backends.find(connectedLayer.GetBackendId());
1511  if (toBackend == backends.end())
1512  {
1513  throw armnn::Exception("Backend id not found for the connected layer");
1514  }
1515 
1516  if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
1517  {
1518  // The destination backend does not support the tensor allocator API, move to the next one
1519  continue;
1520  }
1521 
1522  auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1523  for (auto&& dst : dstPrefs)
1524  {
1525  // Input layers use the mem copy workload or import, so the selected factory must
1526  // support either the map/unmap API or Import API
1527  ITensorHandleFactory* factory = registry.GetFactory(dst);
1528  if (importEnabled && factory->GetImportFlags() == 0)
1529  {
1530  continue;
1531  }
1532  else if (!importEnabled && !factory->SupportsMapUnmap())
1533  {
1534  continue;
1535  }
1536 
1537  auto it = factoryScores.find(dst);
1538  if (it == factoryScores.end())
1539  {
1540  // Add new score to the table
1541  factoryScores[dst] = 0;
1542  if (topChoice == ITensorHandleFactory::LegacyFactoryId)
1543  {
1544  topChoice = dst;
1545  }
1546  }
1547  else
1548  {
1549  // Increase the score
1550  factoryScores[dst]++;
1551 
1552  // Track the best option
1553  if (factoryScores[dst] > topScore)
1554  {
1555  topScore = factoryScores[dst];
1556  topChoice = dst;
1557  }
1558  }
1559  }
1560  }
1561 
1562  return topChoice;
1563 }

References Layer::GetBackendId(), OutputSlot::GetConnections(), TensorHandleFactoryRegistry::GetFactory(), ITensorHandleFactory::GetImportFlags(), OutputSlot::GetOwningLayer(), Layer::GetType(), Input, ITensorHandleFactory::LegacyFactoryId, and ITensorHandleFactory::SupportsMapUnmap().

◆ CalculateSlotOptionForOutput()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOptionForOutput ( BackendsMap backends,
OutputSlot slot,
TensorHandleFactoryRegistry registry 
)

Definition at line 1566 of file Network.cpp.

1569 {
1570  IgnoreUnused(backends, slot, registry);
1571  return ITensorHandleFactory::DeferredFactoryId;
1572 }

References ITensorHandleFactory::DeferredFactoryId, and IgnoreUnused().

◆ ChainReduceLayers()

std::vector<IConnectableLayer*> armnn::ChainReduceLayers ( OptimizationViews optimizationViews,
LayerType baseLayer,
ReduceDescriptor desc 
)

Definition at line 278 of file ArmComputeSubgraphUtils.hpp.

281 {
282  // Vector of new chained layers, used for substitution.
283  std::vector<IConnectableLayer*> layers;
284 
285  // Vector of axes so each layer is reshaped correctly.
286  std::vector<uint32_t> axes;
287  unsigned int recalulatedAxis = 0;
288 
289  for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i)
290  {
291  // Get TensorInfo from base layer and reduce shape using axis.
292  TensorInfo layerInfo = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
293 
294  axes.emplace_back(desc.m_vAxis[i]);
295 
296  const TensorInfo& reducedTensorInfo = ComputeReductionTensorShape(layerInfo,
297  axes,
298  desc.m_KeepDims);
299 
300  // Create a vector for the single axis to be assigned to the descriptor.
301  // Update axis if keepDims is set reduce layers correctly.
302  std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i] - recalulatedAxis);
303 
304  // Create a descriptor and assign single axis.
305  ReduceDescriptor newReduceDescriptor = baseLayer->GetParameters();
306  newReduceDescriptor.m_vAxis.assign(singleAxis.begin(), singleAxis.end());
307 
308  // Add new layer to graph.
309  std::string layerName = "reduce_layer_" + std::to_string(i);
310 
311  Layer* replacementLayer = PolymorphicDowncast<Layer*>(
312  optimizationViews.GetINetwork()->AddReduceLayer(newReduceDescriptor,
313  layerName.c_str()));
314 
315  // Connect previous layer with new layer.
316  // The first and last layer will be connected when the subgraph is replaced.
317  if (!layers.empty())
318  {
319  layers[i - 1]->GetOutputSlot(0).Connect(replacementLayer->GetInputSlot(0));
320  }
321 
322  // Set updated tensorInfo for new layer.
323  replacementLayer->GetOutputSlot(0).SetTensorInfo(reducedTensorInfo);
324 
325  if (!desc.m_KeepDims)
326  {
327  recalulatedAxis++;
328  }
329 
330  layers.emplace_back(replacementLayer);
331  }
332  return layers;
333 }

References INetwork::AddReduceLayer(), ComputeReductionTensorShape(), OptimizationViews::GetINetwork(), Layer::GetInputSlot(), Layer::GetOutputSlot(), ReduceDescriptor::m_KeepDims, ReduceDescriptor::m_vAxis, and OutputSlot::SetTensorInfo().

◆ CheckFlag()

bool armnn::CheckFlag ( MemorySourceFlags  flags,
MemorySource  source 
)
inline

Definition at line 41 of file MemorySources.hpp.

42 {
43  return (static_cast<MemorySourceFlags>(source) & flags) != 0;
44 }

Referenced by LoadedNetwork::ImportInputs(), and LoadedNetwork::ImportOutputs().

◆ CheckFp16Support()

bool armnn::CheckFp16Support ( BackendsMap backends,
const std::vector< BackendId > &  availablePreferredBackends 
)

Definition at line 1026 of file Network.cpp.

1028 {
1029  bool hasFp16 = false;
1030  // Check if the first preferred backend has FP16 support
1031  auto firstBackend = availablePreferredBackends[0];
1032  auto backendObjPtr = backends.find(firstBackend)->second.get();
1033 
1034  auto hasFp16Capability = BackendOptions::BackendOption{"HasFp16", true};
1035  auto backendCapabilities = backendObjPtr->GetCapabilities();
1036 
1037  if (HasMatchingCapability(hasFp16Capability, backendCapabilities))
1038  {
1039  // First preferred backend has FP16 support. Enable reduce FP32 to FP16 when fp16-turbo-mode is enabled.
1040  hasFp16 = true;
1041  ARMNN_LOG(debug) << "The first available preferred backend: " << firstBackend
1042  << ", has FP16 support.";
1043  }
1044  else
1045  {
1046  ARMNN_LOG(warning) << "The first available preferred backend: " << firstBackend
1047  << ", does not have FP16 support. "
1048  << "The FP16 turbo mode option will be disable. It will run using FP32.";
1049  }
1050 
1051  // Check if the rest of the available preferred backends have FP16 support
1052  for (size_t i = 1; i < availablePreferredBackends.size(); ++i)
1053  {
1054  auto backend = availablePreferredBackends[i];
1055  backendObjPtr = backends.find(backend)->second.get();
1056  backendCapabilities = backendObjPtr->GetCapabilities();
1057  if (!HasMatchingCapability(hasFp16Capability, backendCapabilities))
1058  {
1059  ARMNN_LOG(warning) << "Next preferred backend: " << backend << ", does not have FP16 support. "
1060  << "It will run using FP32 when falling back to this backend.";
1061  }
1062  else
1063  {
1064  ARMNN_LOG(debug) << "Next preferred backend: " << backend << ", has FP16 support.";
1065  }
1066  }
1067 
1068  return hasFp16;
1069 }

References ARMNN_LOG, debug, HasMatchingCapability(), and warning.

Referenced by Optimize().

◆ CheckScaleSetOnQuantizedType()

bool armnn::CheckScaleSetOnQuantizedType ( Layer layer,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 801 of file Network.cpp.

802 {
803  bool noErrors = true;
804  unsigned int numOutputs = layer->GetNumOutputSlots();
805  for (unsigned int i = 0; i < numOutputs; i++) {
806  OutputSlot& outputSlot = layer->GetOutputSlot(i);
807  TensorInfo info = outputSlot.GetTensorInfo();
808  auto quantizationDataType = info.GetDataType();
809  auto quantizationScales = info.GetQuantizationScales();
810  // For any Quantized Tensor ensure scale(s) are set
811  switch(quantizationDataType) {
812  case DataType::QAsymmU8:
813  case DataType::QSymmS16:
814  case DataType::QSymmS8:
815  case DataType::QAsymmS8:
816  if ((quantizationDataType == DataType::QAsymmU8 || quantizationDataType == DataType::QAsymmS8)
817  && info.HasPerAxisQuantization()) {
818  throw InvalidArgumentException("Per Axis Quantization is not supported in "
819  "Asymmetric Quantization Datatype.");
820  }
821  // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
822  if (!info.HasPerAxisQuantization() && quantizationDataType == DataType::QAsymmU8 &&
823  (info.GetQuantizationScale() != (1.0f / 256.0f) ||
824  info.GetQuantizationOffset() != 0) &&
825  layer->GetType() == armnn::LayerType::Softmax) {
826  std::stringstream ss;
827  ss << "Quantization parameters for Softmax layer (Scale: " <<
828  info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
829  ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
830  ARMNN_LOG(warning) << ss.str();
831  info.SetQuantizationScale((1.0f / 256.0f));
832  info.SetQuantizationOffset(0);
833  outputSlot.SetTensorInfo(info);
834  ReportError(ss.str(), errMessages);
835  }
836  break;
837  default:
838  break;
839  }
840  }
841  return noErrors;
842 }

References ARMNN_LOG, Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Layer::GetType(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, ReportError(), OutputSlot::SetTensorInfo(), Softmax, and warning.

Referenced by AssignBackendsIConnectable().

◆ CheckSupportRule()

bool armnn::CheckSupportRule ( rule,
Optional< std::string & >  reasonIfUnsupported,
const char *  reason 
)

Definition at line 37 of file LayerSupportRules.hpp.

38 {
39  bool supported = rule();
40  if (!supported && reason)
41  {
42  reasonIfUnsupported.value() += std::string(reason) + "\n"; // Append the reason on a new line
43  }
44  return supported;
45 }

References OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by RefLayerSupport::IsActivationSupported(), RefLayerSupport::IsAdditionSupported(), RefLayerSupport::IsArgMinMaxSupported(), RefLayerSupport::IsBatchMatMulSupported(), RefLayerSupport::IsBatchNormalizationSupported(), RefLayerSupport::IsBatchToSpaceNdSupported(), RefLayerSupport::IsBroadcastToSupported(), RefLayerSupport::IsCastSupported(), RefLayerSupport::IsChannelShuffleSupported(), RefLayerSupport::IsComparisonSupported(), RefLayerSupport::IsConcatSupported(), RefLayerSupport::IsConstantSupported(), RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDebugSupported(), RefLayerSupport::IsDepthToSpaceSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), RefLayerSupport::IsDequantizeSupported(), RefLayerSupport::IsDetectionPostProcessSupported(), RefLayerSupport::IsDivisionSupported(), RefLayerSupport::IsElementwiseUnarySupported(), RefLayerSupport::IsFakeQuantizationSupported(), RefLayerSupport::IsFillSupported(), RefLayerSupport::IsFloorSupported(), RefLayerSupport::IsFullyConnectedSupported(), RefLayerSupport::IsGatherNdSupported(), RefLayerSupport::IsGatherSupported(), RefLayerSupport::IsInstanceNormalizationSupported(), RefLayerSupport::IsL2NormalizationSupported(), RefLayerSupport::IsLayerSupported(), RefLayerSupport::IsLogicalBinarySupported(), RefLayerSupport::IsLogSoftmaxSupported(), RefLayerSupport::IsLstmSupported(), RefLayerSupport::IsMaximumSupported(), RefLayerSupport::IsMeanSupported(), RefLayerSupport::IsMemCopySupported(), RefLayerSupport::IsMinimumSupported(), RefLayerSupport::IsMultiplicationSupported(), RefLayerSupport::IsNormalizationSupported(), RefLayerSupport::IsPadSupported(), RefLayerSupport::IsPermuteSupported(), RefLayerSupport::IsPooling2dSupported(), RefLayerSupport::IsPooling3dSupported(), RefLayerSupport::IsPreluSupported(), RefLayerSupport::IsQuantizeSupported(), RefLayerSupport::IsRankSupported(), RefLayerSupport::IsReduceSupported(), RefLayerSupport::IsReshapeSupported(), RefLayerSupport::IsResizeSupported(), RefLayerSupport::IsReverseV2Supported(), RefLayerSupport::IsScatterNdSupported(), RefLayerSupport::IsShapeSupported(), RefLayerSupport::IsSliceSupported(), RefLayerSupport::IsSoftmaxSupported(), RefLayerSupport::IsSpaceToBatchNdSupported(), RefLayerSupport::IsSpaceToDepthSupported(), RefLayerSupport::IsSplitterSupported(), RefLayerSupport::IsStackSupported(), RefLayerSupport::IsStridedSliceSupported(), RefLayerSupport::IsSubtractionSupported(), RefLayerSupport::IsTileSupported(), RefLayerSupport::IsTransposeConvolution2dSupported(), RefLayerSupport::IsTransposeSupported(), and RefLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ ClAbsWorkloadValidate()

arm_compute::Status ClAbsWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file ClAbsWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::CLAbsLayer::validate(&aclInput, &aclOutput);
25 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClActivationWorkloadValidate()

arm_compute::Status ClActivationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ActivationDescriptor descriptor 
)

Definition at line 17 of file ClActivationWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  const arm_compute::ActivationLayerInfo activationLayerInfo =
26 
27  return arm_compute::CLActivationLayer::validate(&aclInput,
28  &aclOutput,
29  activationLayerInfo);
30 }

Referenced by ClLayerSupport::IsActivationSupported().

◆ ClAdditionValidate()

arm_compute::Status ClAdditionValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 45 of file ClAdditionWorkload.cpp.

49 {
50  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
51  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
52  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
53 
54  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
55  activationDescriptor);
56 
57  const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info,
58  &aclInput1Info,
59  &aclOutputInfo,
60  g_AclConvertPolicy,
61  activationInfo);
62 
63  return aclStatus;
64 }

Referenced by ClLayerSupport::IsAdditionSupported(), ClLayerSupport::IsLayerSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClArgMinMaxWorkloadValidate()

arm_compute::Status ClArgMinMaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ArgMinMaxDescriptor descriptor 
)

Definition at line 31 of file ClArgMinMaxWorkload.cpp.

34 {
35  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
36  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
37 
38  auto numDims = input.GetNumDimensions();
39  auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis);
40  int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
41 
42  if (descriptor.m_Function == ArgMinMaxFunction::Max)
43  {
44  return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
45  arm_compute::ReductionOperation::ARG_IDX_MAX);
46  }
47  else
48  {
49  return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
50  arm_compute::ReductionOperation::ARG_IDX_MIN);
51  }
52 }

Referenced by ClLayerSupport::IsArgMinMaxSupported().

◆ ClBackendId()

constexpr const char* armnn::ClBackendId ( )
constexpr

Definition at line 10 of file ClBackendId.hpp.

10 { return "GpuAcc"; }

Referenced by ClBackend::GetIdStatic().

◆ ClBatchMatMulValidate()

arm_compute::Status ClBatchMatMulValidate ( const TensorInfo inputInfoX,
const TensorInfo inputInfoY,
const TensorInfo outputInfo,
const BatchMatMulDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file ClBatchMatMulWorkload.cpp.

29 {
30  if (descriptor.m_AdjointX || descriptor.m_AdjointY )
31  {
32  throw Exception("Support for adjoint not implemented.");
33  }
34  if (descriptor.m_DataLayoutX != armnn::DataLayout::NCHW || descriptor.m_DataLayoutY != armnn::DataLayout::NCHW )
35  {
36  throw Exception("Only supported the MatMul in the last 2 dimensions");
37  }
38 
39  arm_compute::TensorInfo aclInputInfoX = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoX);
40  arm_compute::TensorInfo aclInputInfoY = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoY);
41  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(outputInfo);
42 
43  // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
44  aclInputInfoX.set_are_values_constant(false);
45  aclInputInfoY.set_are_values_constant(false);
46 
47  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
48  activationDescriptor);
49 
50  arm_compute::MatMulInfo matMulInfo;
51  matMulInfo.adj_lhs(descriptor.m_TransposeX);
52  matMulInfo.adj_rhs(descriptor.m_TransposeY);
53 
54  return arm_compute::CLMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, activationInfo);
55 }

References BatchMatMulDescriptor::m_AdjointX, BatchMatMulDescriptor::m_AdjointY, BatchMatMulDescriptor::m_DataLayoutX, BatchMatMulDescriptor::m_DataLayoutY, and NCHW.

Referenced by ClLayerSupport::IsBatchMatMulSupported().

◆ ClBatchNormalizationValidate()

arm_compute::Status ClBatchNormalizationValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo mean,
const TensorInfo var,
const TensorInfo beta,
const TensorInfo gamma,
const BatchNormalizationDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file ClBatchNormalizationFloatWorkload.cpp.

27 {
28  const arm_compute::TensorInfo aclInputInfo =
29  armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
30  const arm_compute::TensorInfo aclOutputInfo =
31  armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclMeanInfo =
33  armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
34  const arm_compute::TensorInfo aclVarInfo =
35  armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
36  const arm_compute::TensorInfo aclBetaInfo =
37  armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
38  const arm_compute::TensorInfo aclGammaInfo =
39  armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
40 
41  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
42  activationDescriptor);
43 
44  return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo,
45  &aclOutputInfo,
46  &aclMeanInfo,
47  &aclVarInfo,
48  &aclBetaInfo,
49  &aclGammaInfo,
50  descriptor.m_Eps,
51  activationInfo);
52 }

Referenced by ClLayerSupport::IsBatchNormalizationSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClBatchToSpaceNdWorkloadValidate()

arm_compute::Status ClBatchToSpaceNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const BatchToSpaceNdDescriptor descriptor 
)

Definition at line 17 of file ClBatchToSpaceNdWorkload.cpp.

20 {
21  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
22  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
23 
24  arm_compute::Status statusBatchToSpace = arm_compute::Status(arm_compute::ErrorCode::OK);
25  arm_compute::Status statusReshapeInput = arm_compute::Status(arm_compute::ErrorCode::OK);
26  arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
27 
28  arm_compute::TensorInfo aclReshapeInputInfo = aclInputInfo;
29  arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
30 
31  // When a spacial dimension is missing (rank=3) set W to 1
32  const unsigned int rank = input.GetNumDimensions();
33  if (rank == 3)
34  {
35  const arm_compute::TensorShape inputShape = aclInputInfo.tensor_shape();
36  const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
37 
38  if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
39  {
40  // In ACL dimensions are right to left: C, W, H, N
41  aclInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
42  aclOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
43  }
44  else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
45  {
46  // In ACL dimensions are right to left: W, H, C, N
47  aclInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
48  aclOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
49  }
50  else
51  {
52  throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
53  }
54 
55  statusReshapeInput = arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
56  statusReshapeOutput = arm_compute::CLReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
57  }
58 
59  // ArmNN blockShape is [H, W] ACl asks for W, H
60  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
61  int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
62 
63  const arm_compute::CropInfo cropInfo = BuildArmComputeCropInfo(descriptor, rank);
64 
65  statusBatchToSpace = arm_compute::CLBatchToSpaceLayer::validate(rank == 3 ? &aclReshapeInputInfo : &aclInputInfo,
66  blockWidth,
67  blockHeight,
68  rank == 3 ? &aclReshapeOutputInfo : &aclOutputInfo,
69  cropInfo);
70 
71  if (statusReshapeInput.error_code() == arm_compute::ErrorCode::OK &&
72  statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
73  statusBatchToSpace.error_code() == arm_compute::ErrorCode::OK)
74  {
75  return arm_compute::Status(arm_compute::ErrorCode::OK,
76  "All BatchToSpace layers validate status OK.");
77  }
78  else
79  {
80  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
81  "BatchToSpace layer validate status failed."
82  + statusBatchToSpace.error_description()
83  + statusReshapeInput.error_description()
84  + statusReshapeOutput.error_description());
85  }
86 }

Referenced by ClLayerSupport::IsBatchToSpaceNdSupported().

◆ ClCastValidate()

arm_compute::Status ClCastValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 20 of file ClCastWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
25  return arm_compute::CLCast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
26 }

Referenced by ClLayerSupport::IsCastSupported().

◆ ClChannelShuffleValidate()

arm_compute::Status ClChannelShuffleValidate ( const TensorInfo input,
const TensorInfo output,
const ChannelShuffleDescriptor descriptor 
)

Definition at line 20 of file ClChannelShuffleWorkload.cpp.

23 {
24  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
25  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
26 
27  // In Arm NN and in NNAPI, channel shuffle implementation is datalayout agnostic and it has axis as a parameter.
28  // The channel shuffle Implementation for Neon is dependent on datalayout and does not have axis as a parameter,
29  // it only supports channel shuffle for 4D tensors in dimension C (1 or 3).
30  arm_compute::DataLayout aclDataLayout;
31  if (input.GetNumDimensions() == 4)
32  {
33  switch (descriptor.m_Axis)
34  {
35  case 1:
36  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
37  break;
38  case 3:
39  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NHWC);
40  break;
41  default:
42  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported axis"};
43  }
44  aclInputInfo.set_data_layout(aclDataLayout);
45  aclOutputInfo.set_data_layout(aclDataLayout);
46  return arm_compute::CLChannelShuffleLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_NumGroups);
47  }
48  else
49  {
50  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported number of dimensions"};
51  }
52 }

Referenced by ClLayerSupport::IsChannelShuffleSupported().

◆ ClComparisonWorkloadValidate()

arm_compute::Status ClComparisonWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ComparisonDescriptor descriptor 
)

Definition at line 24 of file ClComparisonWorkload.cpp.

28 {
29  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
30  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
31  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
32 
33  const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(descriptor);
34 
35  const arm_compute::Status aclStatus = arm_compute::CLComparison::validate(&aclInput0Info,
36  &aclInput1Info,
37  &aclOutputInfo,
38  comparisonOperation);
39  return aclStatus;
40 }

Referenced by ClLayerSupport::IsComparisonSupported().

◆ ClConcatWorkloadValidate()

arm_compute::Status ClConcatWorkloadValidate ( const std::vector< const TensorInfo * > &  inputs,
const TensorInfo output,
const OriginsDescriptor descriptor 
)

Definition at line 27 of file ClConcatWorkload.cpp.

30 {
31  std::vector<arm_compute::TensorInfo> aclInputs;
32  for (const TensorInfo* input : inputs)
33  {
34  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
35  aclInputs.emplace_back(aclInputInfo);
36  }
37  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
38  std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
39  for (arm_compute::ITensorInfo& input : aclInputs)
40  {
41  aclInputPtrs.emplace_back(&input);
42  }
43 
44  size_t aclAxis = CalcAxis(descriptor);
45  return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
46 }

Referenced by ClLayerSupport::IsConcatSupported().

◆ ClConstantWorkloadValidate()

arm_compute::Status ClConstantWorkloadValidate ( const TensorInfo output)

Definition at line 18 of file ClConstantWorkload.cpp.

19 {
20  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  std::array<arm_compute::DataType,8> supportedTypes = {
23  arm_compute::DataType::F16,
24  arm_compute::DataType::F32,
25  arm_compute::DataType::QASYMM8,
26  arm_compute::DataType::QASYMM8_SIGNED,
27  arm_compute::DataType::QSYMM16,
28  arm_compute::DataType::QSYMM8,
29  arm_compute::DataType::QSYMM8_PER_CHANNEL,
30  arm_compute::DataType::S32
31  };
32  auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
33 
34  if (it != end(supportedTypes))
35  {
36  return arm_compute::Status{};
37  }
38  else
39  {
40  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
41  }
42 }

Referenced by ClLayerSupport::IsConstantSupported().

◆ ClContextBufferHasIdentifier()

bool armnn::ClContextBufferHasIdentifier ( const void *  buf)
inline

Definition at line 152 of file ClContextSchema_generated.h.

152  {
153  return flatbuffers::BufferHasIdentifier(
154  buf, ClContextIdentifier());
155 }

References ClContextIdentifier().

◆ ClContextExtension()

const char* armnn::ClContextExtension ( )
inline

Definition at line 167 of file ClContextSchema_generated.h.

167  {
168  return "armnn";
169 }

◆ ClContextIdentifier()

const char* armnn::ClContextIdentifier ( )
inline

◆ ClConvertFp16ToFp32WorkloadValidate()

arm_compute::Status ClConvertFp16ToFp32WorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 44 of file ClConvertFp16ToFp32Workload.cpp.

45 {
46  if (input.GetDataType() != DataType::Float16)
47  {
48  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float16");
49  }
50  if (output.GetDataType() != DataType::Float32)
51  {
52  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float32");
53  }
54 
55  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
56  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
57 
58  const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
59  &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
60 
61  return aclStatus;
62 }

References Float16, Float32, and TensorInfo::GetDataType().

Referenced by ClLayerSupport::IsConvertFp16ToFp32Supported().

◆ ClConvertFp32ToFp16WorkloadValidate()

arm_compute::Status ClConvertFp32ToFp16WorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 44 of file ClConvertFp32ToFp16Workload.cpp.

45 {
46  if (input.GetDataType() != DataType::Float32)
47  {
48  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float32");
49  }
50  if (output.GetDataType() != DataType::Float16)
51  {
52  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float16");
53  }
54 
55  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
56  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
57 
58  const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
59  &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
60 
61  return aclStatus;
62 }

References Float16, Float32, and TensorInfo::GetDataType().

Referenced by ClLayerSupport::IsConvertFp32ToFp16Supported().

◆ ClConvolution2dWorkloadValidate()

arm_compute::Status ClConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 23 of file ClConvolution2dWorkload.cpp.

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
33  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
34  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
35 
36  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
37  descriptor.m_DilationY);
38 
39  arm_compute::TensorInfo aclBiasesInfo;
40  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
41 
42  if (descriptor.m_BiasEnabled)
43  {
44  if (!biases.has_value())
45  {
46  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
47  "ArmNN ClConvolution2dWorkload has empty bias value."};
48  }
49  // There's currently a problem with non const bias, so we'll explicitly block it here.
50  if (!biases.value().IsConstant())
51  {
52  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
53  "ArmNN ClDepthwiseConv2dWorkload does not support non constant bias."};
54  }
55  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
56  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
57  optionalAclBiasesInfo = &aclBiasesInfo;
58  }
59 
60  arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
61 
62  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
63  activationDescriptor);
64 
65  return arm_compute::CLConvolutionLayer::validate(&aclInputInfo,
66  &aclWeightsInfo,
67  optionalAclBiasesInfo,
68  &aclOutputInfo,
69  layerInfo,
70  arm_compute::WeightsInfo(),
71  aclDilationInfo,
72  activationInfo,
73  isFastMathEnabled);
74 }

Referenced by ClLayerSupport::IsConvolution2dSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClConvolution3dWorkloadValidate()

arm_compute::Status ClConvolution3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution3dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 23 of file ClConvolution3dWorkload.cpp.

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
33 
34  arm_compute::TensorInfo aclBiasesInfo;
35  arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
36  if (descriptor.m_BiasEnabled)
37  {
38  if (!biases.has_value())
39  {
40  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
41  "ArmNN ClConvolution3dWorkload has empty bias value."};
42  }
43  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
44  optionalAclBiasesInfo = &aclBiasesInfo;
45  }
46 
47  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
48 
49  const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
50  isFastMathEnabled,
51  activationDescriptor);
52 
53  return arm_compute::CLConv3D::validate(&aclInputInfo,
54  &aclWeightsInfo,
55  optionalAclBiasesInfo,
56  &aclOutputInfo,
57  aclConv3DInfo);
58 }

Referenced by ClLayerSupport::IsConvolution3dSupported().

◆ ClDepthToSpaceWorkloadValidate()

arm_compute::Status ClDepthToSpaceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthToSpaceDescriptor descriptor 
)

Definition at line 22 of file ClDepthToSpaceWorkload.cpp.

25 {
26  DataLayout dataLayout = descriptor.m_DataLayout;
27  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
28 
29  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
30 
31  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
32 
33  const arm_compute::Status aclStatus = arm_compute::CLDepthToSpaceLayer::validate(&aclInputInfo,
34  &aclOutputInfo,
35  blockSize);
36  return aclStatus;
37 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsDepthToSpaceSupported().

◆ ClDepthwiseConvolutionWorkloadValidate()

arm_compute::Status ClDepthwiseConvolutionWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const ActivationDescriptor activationDescriptor 
)

Definition at line 26 of file ClDepthwiseConvolutionWorkload.cpp.

32 {
33  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
34  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
35 
36  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
37  //
38  // ACL format for weights for depthwise is:
39  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
40  // - [1, C, H, W] for [N, C, H, W] input/output layout
41  //
42  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
43  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
44  // so we do the permute here for the TensorInfo weights.
45  unsigned int aclDepthMultiplier;
46  TensorInfo weightsPermuted;
47  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
48 
49  // Convert the weights into the compute library format
50  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
51  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
52 
53  arm_compute::TensorInfo aclBiasesInfo;
54  arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
55  if (descriptor.m_BiasEnabled)
56  {
57  if (!biases.has_value())
58  {
59  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
60  "ArmNN ClDepthwiseConv2dWorkload has empty bias value."};
61  }
62  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
63  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
64  optionalAclBiasesInfo = &aclBiasesInfo;
65  }
66 
67  const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
68  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
69  descriptor.m_DilationX,
70  descriptor.m_DilationY);
71 
72  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
73  activationDescriptor);
74 
75  return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
76  &aclWeightsInfo,
77  optionalAclBiasesInfo,
78  &aclOutputInfo,
79  aclPadStrideInfo,
80  aclDepthMultiplier,
81  activationInfo,
82  aclDilationInfo);
83 
84 }

Referenced by ClLayerSupport::IsDepthwiseConvolutionSupported(), ClLayerSupport::IsDilatedDepthwiseConvolutionSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClDequantizeWorkloadValidate()

arm_compute::Status ClDequantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 22 of file ClDequantizeWorkload.cpp.

23 {
24  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
25  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
26 
27  return arm_compute::CLDequantizationLayer::validate(&aclInputInfo, &aclOutputInfo);
28 }

Referenced by ClLayerSupport::IsDequantizeSupported().

◆ ClDivisionWorkloadValidate()

arm_compute::Status ClDivisionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 18 of file ClDivisionWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
24  const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
28  activationDescriptor);
29 
30  return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput, activationInfo);
31 }

Referenced by ClLayerSupport::IsDivisionSupported(), ClLayerSupport::IsLayerSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClElementwiseBinaryValidate()

arm_compute::Status ClElementwiseBinaryValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ElementwiseBinaryDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 64 of file ClElementwiseBinaryWorkload.cpp.

69 {
70  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
71  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
72  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
73 
74  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
75  activationDescriptor);
76 
77  switch (descriptor.m_Operation)
78  {
80  return arm_compute::CLElementwisePower::validate(&aclInput0Info,
81  &aclInput1Info,
82  &aclOutputInfo,
83  activationInfo);
85  return arm_compute::CLElementwiseSquaredDiff::validate(&aclInput0Info,
86  &aclInput1Info,
87  &aclOutputInfo,
88  activationInfo);
89  default:
90  throw InvalidArgumentException("Unknown binary operator", CHECK_LOCATION());
91  }
92 }

Referenced by ClLayerSupport::IsLayerSupported().

◆ ClExpWorkloadValidate()

arm_compute::Status ClExpWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClExpWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLExpLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClFloorWorkloadValidate()

arm_compute::Status ClFloorWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 14 of file ClFloorFloatWorkload.cpp.

16 {
17  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
18  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
19 
20  return arm_compute::CLFloor::validate(&aclInput, &aclOutput);
21 }

Referenced by ClLayerSupport::IsFloorSupported().

◆ ClFullyConnectedWorkloadValidate()

arm_compute::Status ClFullyConnectedWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const FullyConnectedDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file ClFullyConnectedWorkload.cpp.

25 {
26  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
27  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
28  arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
29  aclWeights.set_are_values_constant(weights.IsConstant());
30 
31  arm_compute::TensorInfo aclBiases;
32  arm_compute::TensorInfo* optionalAclBiases = nullptr;
33  if (descriptor.m_BiasEnabled)
34  {
36  biases.has_value(),
37  "ClFullyConnectedWorkload: Bias was enabled in the descriptor but no value was supplied.");
38  aclBiases = BuildArmComputeTensorInfo(biases.value());
39  aclBiases.set_are_values_constant(biases.value().IsConstant());
40  optionalAclBiases = &aclBiases;
41  }
42 
43  const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
44  ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
45 
46  return arm_compute::CLFullyConnectedLayer::validate(&aclInput,
47  &aclWeights,
48  optionalAclBiases,
49  &aclOutput,
50  fullyConnectedLayerInfo);
51 }

Referenced by ClLayerSupport::IsFullyConnectedSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClGatherNdWorkloadValidate()

arm_compute::Status ClGatherNdWorkloadValidate ( const TensorInfo paramsInfo,
const TensorInfo indicesInfo,
const TensorInfo outputInfo 
)

Validate Mul

Validate ReduceSum

Validate Gather

Validate Reshape

Return OK if all the layers are valid

Definition at line 16 of file ClGatherNdWorkload.cpp.

19 {
20  // Calculate ND, K, W, C.
21  std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
22 
23  /// Validate Mul
24  // Indices with shape { W, ND }
25  armnn::TensorInfo indices_W_ND_Info = indicesInfo;
26  indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
27  const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
28 
29  // Flattened coefficients with shape { ND }
30  armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
31  flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
32  const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
33 
34  // Output of Mul with shape { W, ND }
35  const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
36 
37  auto statusMul = arm_compute::CLPixelWiseMultiplication::validate(&aclIndicesInfo,
38  &aclFlattenedCoeffInfo,
39  &aclOutputMulInfo,
40  1.0f,
41  arm_compute::ConvertPolicy::WRAP,
42  arm_compute::RoundingPolicy::TO_ZERO,
43  arm_compute::ActivationLayerInfo());
44 
45  /// Validate ReduceSum
46  // Flattened indices with shape { W }
47  armnn::TensorInfo flattenedIndices_Info = indicesInfo;
48  flattenedIndices_Info.SetShape({ keyIndices["W"] });
49  const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
50 
51  const std::vector<unsigned int> armnnReduceAxes(1, 1);
52  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
53  indices_W_ND_Info.GetNumDimensions(),
54  armnnReduceAxes);
55 
56  auto statusReduceSum = arm_compute::CLReductionOperation::validate(&aclOutputMulInfo,
57  &aclFlattenedIndicesInfo,
58  static_cast<unsigned int>(coords[0]),
59  arm_compute::ReductionOperation::SUM,
60  false);
61 
62  /// Validate Gather
63  // Params with shape { K, C }
64  armnn::TensorInfo params_K_C_Info = paramsInfo;
65  params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
66  const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
67 
68  // Output of gather with shape { W, C }
69  armnn::TensorInfo outputGather_Info = outputInfo;
70  outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
71  const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
72 
73  auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
74  auto statusGather =
75  arm_compute::CLGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
76 
77  /// Validate Reshape
78  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
79 
80  auto statusReshape = arm_compute::CLReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
81 
82  /// Return OK if all the layers are valid
83  auto okCode = arm_compute::ErrorCode::OK;
84  if (statusMul.error_code() == okCode &&
85  statusReduceSum.error_code() == okCode &&
86  statusGather.error_code() == okCode &&
87  statusReshape.error_code() == okCode)
88  {
89  return arm_compute::Status(arm_compute::ErrorCode::OK,
90  "All GatherND layers validate status OK.");
91  }
92  else
93  {
94  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
95  "GatherND layer validate status failed.");
96  }
97 }

References CalculateGatherNdKeyIndices(), and TensorInfo::SetShape().

Referenced by ClLayerSupport::IsGatherNdSupported().

◆ ClGatherWorkloadValidate()

arm_compute::Status ClGatherWorkloadValidate ( const TensorInfo input,
const TensorInfo indices,
const TensorInfo output,
const GatherDescriptor descriptor 
)

Definition at line 15 of file ClGatherWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
22  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
23 
24  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
25 
26  return arm_compute::CLGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
27 }

Referenced by ClLayerSupport::IsGatherSupported().

◆ ClImportTensorHandleFactoryId()

constexpr const char* armnn::ClImportTensorHandleFactoryId ( )
constexpr

Definition at line 14 of file ClImportTensorHandleFactory.hpp.

15 {
16  return "Arm/Cl/ImportTensorHandleFactory";
17 }

Referenced by ClImportTensorHandleFactory::GetIdStatic().

◆ ClInstanceNormalizationWorkloadValidate()

arm_compute::Status ClInstanceNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const InstanceNormalizationDescriptor descriptor 
)

Definition at line 18 of file ClInstanceNormalizationWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
24 
25  return arm_compute::CLInstanceNormalizationLayer::validate(&aclInputInfo,
26  &aclOutputInfo,
27  descriptor.m_Gamma,
28  descriptor.m_Beta,
29  descriptor.m_Eps);
30 }

Referenced by ClLayerSupport::IsInstanceNormalizationSupported().

◆ ClL2NormalizationWorkloadValidate()

arm_compute::Status ClL2NormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const L2NormalizationDescriptor descriptor 
)

Definition at line 17 of file ClL2NormalizationFloatWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
22  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
23 
24  int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
25 
26  return arm_compute::CLL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps);
27 }

Referenced by ClLayerSupport::IsL2NormalizationSupported().

◆ ClLogicalAndWorkloadValidate()

arm_compute::Status ClLogicalAndWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 20 of file ClLogicalAndWorkload.cpp.

23 {
24  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
25  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
26  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
27 
28  const arm_compute::Status aclStatus = arm_compute::CLLogicalAnd::validate(&aclInputInfo0,
29  &aclInputInfo1,
30  &aclOutputInfo);
31  return aclStatus;
32 }

Referenced by ClLayerSupport::IsLogicalBinarySupported().

◆ ClLogicalNotWorkloadValidate()

arm_compute::Status ClLogicalNotWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 20 of file ClLogicalNotWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
25 
26  const arm_compute::Status aclStatus = arm_compute::CLLogicalNot::validate(&aclInputInfo,
27  &aclOutputInfo);
28  return aclStatus;
29 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClLogicalOrWorkloadValidate()

arm_compute::Status ClLogicalOrWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 20 of file ClLogicalOrWorkload.cpp.

23 {
24  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
25  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
26  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
27 
28  const arm_compute::Status aclStatus = arm_compute::CLLogicalOr::validate(&aclInputInfo0,
29  &aclInputInfo1,
30  &aclOutputInfo);
31  return aclStatus;
32 }

Referenced by ClLayerSupport::IsLogicalBinarySupported().

◆ ClLogSoftmaxWorkloadValidate()

arm_compute::Status ClLogSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const LogSoftmaxDescriptor descriptor 
)

Definition at line 17 of file ClLogSoftmaxWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
25  return arm_compute::CLLogSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
26 }

Referenced by ClLayerSupport::IsLogSoftmaxSupported().

◆ ClLogWorkloadValidate()

arm_compute::Status ClLogWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClLogWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLLogLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClLstmFloatWorkloadValidate()

arm_compute::Status ClLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo scratchBuffer,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const LstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 244 of file ClLstmFloatWorkload.cpp.

249 {
250  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
251 
252  // The inputs and the outputs
253  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
254  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
255  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
256  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
257  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
258  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
259  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
260 
261  // Basic parameters
262  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
263  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
264  const arm_compute::TensorInfo aclInputToCellWeightsInfo
265  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
266  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
267  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
268  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
269  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
270  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
271  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
272  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
273  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
274  const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
275  const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
276  const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
277 
278  arm_compute::TensorInfo aclInputToInputWeightsInfo;
279  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
280  arm_compute::TensorInfo aclCellToInputWeightsInfo;
281  arm_compute::TensorInfo aclInputGateBiasInfo;
282  arm_compute::TensorInfo aclProjectionWeightsInfo;
283  arm_compute::TensorInfo aclProjectionBiasInfo;
284  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
285  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
286  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
287  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
288  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
289  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
290 
291  if (!descriptor.m_CifgEnabled)
292  {
293  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
294  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
295 
296  if (paramsInfo.m_CellToInputWeights != nullptr)
297  {
298  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
299  }
300  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
301  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
302  paramsInfo.m_CellToInputWeights != nullptr ?
303  &aclCellToInputWeightsInfo: nullptr,
304  &aclInputGateBiasInfo);
305  }
306 
307  if (descriptor.m_ProjectionEnabled)
308  {
309  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
310 
311  if (paramsInfo.m_ProjectionBias != nullptr)
312  {
313  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
314  }
315  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
316  paramsInfo.m_ProjectionBias != nullptr ?
317  &aclProjectionBiasInfo: nullptr);
318  }
319 
320  if (descriptor.m_PeepholeEnabled)
321  {
322  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
323  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
324  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
325  }
326 
327  float cell_threshold = descriptor.m_ClippingThresCell;
328  float projection_threshold = descriptor.m_ClippingThresProj;
329 
330  // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
331  arm_compute::ActivationLayerInfo activationLayerInfo =
332  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
333 
334  if (descriptor.m_LayerNormEnabled)
335  {
336  if (!descriptor.m_CifgEnabled)
337  {
338  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
339  }
340 
341  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
342 
343  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
344 
345  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
346 
347  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ?
348  nullptr : &aclInputLayerNormWeightsInfo,
349  &aclForgetLayerNormWeightsInfo,
350  &aclCellLayerNormWeightsInfo,
351  &aclOutputLayerNormWeightsInfo);
352  }
353 
354  return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo,
355  &aclInputToCellWeightsInfo,
356  &aclInputToOutputWeightsInfo,
357  &aclRecurrentToForgetWeightsInfo,
358  &aclRecurrentToCellWeightsInfo,
359  &aclRecurrentToOutputWeightsInfo,
360  &aclForgetGateBiasInfo,
361  &aclCellBiasInfo,
362  &aclOutputGateBiasInfo,
363  &aclOutputStateInInfo, &aclCellStateInInfo,
364  &aclScratchBufferInfo, &aclOutputStateOutInfo,
365  &aclCellStateOutInfo, &aclOutputInfo,
366  lstm_params_info, activationLayerInfo,
367  cell_threshold, projection_threshold);
368 }

Referenced by ClLayerSupport::IsLstmSupported().

◆ ClMaximumWorkloadValidate()

arm_compute::Status ClMaximumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 24 of file ClMaximumWorkload.cpp.

27 {
28  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
29  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
30  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
31 
32  const arm_compute::Status aclStatus = arm_compute::CLElementwiseMax::validate(&aclInput0Info,
33  &aclInput1Info,
34  &aclOutputInfo);
35 
36  return aclStatus;
37 }

Referenced by ClLayerSupport::IsLayerSupported(), and ClLayerSupport::IsMaximumSupported().

◆ ClMeanValidate()

arm_compute::Status ClMeanValidate ( const TensorInfo input,
const TensorInfo output,
const MeanDescriptor descriptor 
)

Definition at line 17 of file ClMeanWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
25  input.GetNumDimensions(),
26  descriptor.m_Axis);
27 
28  return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
29 }

Referenced by ClLayerSupport::IsMeanSupported().

◆ ClMinimumWorkloadValidate()

arm_compute::Status ClMinimumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 24 of file ClMinimumWorkload.cpp.

27 {
28  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
29  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
30  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
31 
32  const arm_compute::Status aclStatus = arm_compute::CLElementwiseMin::validate(&aclInput0Info,
33  &aclInput1Info,
34  &aclOutputInfo);
35 
36  return aclStatus;
37 }

Referenced by ClLayerSupport::IsLayerSupported(), and ClLayerSupport::IsMinimumSupported().

◆ ClMultiplicationWorkloadValidate()

arm_compute::Status ClMultiplicationWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 18 of file ClMultiplicationWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
24  const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  auto convertPolicy = (IsQuantizedType(input0.GetDataType()) || IsQuantizedType(input1.GetDataType())) ?
28  arm_compute::ConvertPolicy::SATURATE :
29  arm_compute::ConvertPolicy::WRAP;
30 
31  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
32  activationDescriptor);
33 
34  // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
35  // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
36  // ignored for F32 tensors.
37  return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1,
38  &aclInput2,
39  &aclOutput,
40  1.0f,
41  convertPolicy,
42  arm_compute::RoundingPolicy::TO_ZERO,
43  activationInfo);
44 }

Referenced by ClLayerSupport::IsLayerSupported(), ClLayerSupport::IsMultiplicationSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClNegWorkloadValidate()

arm_compute::Status ClNegWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClNegWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLNegLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClNormalizationWorkloadValidate()

arm_compute::Status ClNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const NormalizationDescriptor descriptor 
)

Definition at line 19 of file ClNormalizationFloatWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
25 
26  arm_compute::NormalizationLayerInfo layerInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
27 
28  return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
29 }

Referenced by ClLayerSupport::IsNormalizationSupported().

◆ ClPadValidate()

arm_compute::Status ClPadValidate ( const TensorInfo input,
const TensorInfo output,
const PadDescriptor descriptor 
)

Definition at line 62 of file ClPadWorkload.cpp.

65 {
66  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
67  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
68 
69  std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size());
70 
71  std::reverse_copy(std::begin(descriptor.m_PadList),
72  std::end(descriptor.m_PadList),
73  std::begin(reversed_PadList));
74 
75  arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
76 
77  // PixelValue is currently unused when validating, but it's required to pass in PaddingMode.
78  arm_compute::PixelValue pixelValue = GetPixelValue(&aclInputInfo, descriptor.m_PadValue);
79  const arm_compute::Status aclStatus =
80  arm_compute::CLPadLayer::validate(&aclInputInfo,
81  &aclOutputInfo,
82  padList,
83  pixelValue,
84  ConvertPaddingModeToAcl(descriptor.m_PaddingMode));
85 
86  return aclStatus;
87 }

Referenced by ClLayerSupport::IsPadSupported().

◆ ClPermuteWorkloadValidate()

arm_compute::Status ClPermuteWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const PermuteDescriptor descriptor 
)

Definition at line 17 of file ClPermuteWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
24 
25  return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo,
26  armcomputetensorutils::BuildArmComputePermutationVector(mappings));
27 }

Referenced by ClLayerSupport::IsPermuteSupported().

◆ ClPooling2dWorkloadValidate()

arm_compute::Status ClPooling2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling2dDescriptor descriptor 
)

Definition at line 18 of file ClPooling2dWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
24 
25  arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
26 
27  return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
28 }

Referenced by ClLayerSupport::IsPooling2dSupported().

◆ ClPooling3dWorkloadValidate()

arm_compute::Status ClPooling3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling3dDescriptor descriptor 
)

Definition at line 18 of file ClPooling3dWorkload.cpp.

21  {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
24 
25  arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
26 
27  return arm_compute::CLPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
28  }

Referenced by ClLayerSupport::IsPooling3dSupported().

◆ ClPreluWorkloadValidate()

arm_compute::Status ClPreluWorkloadValidate ( const TensorInfo input,
const TensorInfo alpha,
const TensorInfo output 
)

Definition at line 16 of file ClPreluWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::CLPReluLayer::validate(&aclInput,
25  &aclAlpha,
26  &aclOutput);
27 }

Referenced by ClLayerSupport::IsPreluSupported().

◆ ClQLstmWorkloadValidate()

arm_compute::Status ClQLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo cellStateIn,
const TensorInfo outputStateIn,
const TensorInfo cellStateOut,
const TensorInfo outputStateOut,
const TensorInfo output,
const QLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 247 of file ClQLstmWorkload.cpp.

255 {
256  arm_compute::LSTMParams<arm_compute::ITensorInfo> aclParamsInfo;
257 
258  // Input/Output tensor info
259  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
260  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
261  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
262 
263  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
264  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
265  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
266 
267  // Mandatory tensor info
268  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
269  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
270  const arm_compute::TensorInfo aclInputToCellWeightsInfo
271  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
272  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
273  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
274  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
275  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
276  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
277  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
278  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
279  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
280  const arm_compute::TensorInfo aclForgetGateBiasInfo
281  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
282  const arm_compute::TensorInfo aclCellBiasInfo
283  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
284  const arm_compute::TensorInfo aclOutputGateBiasInfo
285  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
286 
287  // Optional tensor info
288  arm_compute::TensorInfo aclInputToInputWeightsInfo;
289  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
290 
291  arm_compute::TensorInfo aclCellToInputWeightsInfo;
292  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
293  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
294 
295  arm_compute::TensorInfo aclInputGateBiasInfo;
296 
297  arm_compute::TensorInfo aclProjectionWeightsInfo;
298  arm_compute::TensorInfo aclProjectionBiasInfo;
299 
300  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
301  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
302  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
303  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
304 
305  // Create tensor info for optional params if they are enabled
306  if (descriptor.m_PeepholeEnabled)
307  {
308  if (!descriptor.m_CifgEnabled)
309  {
310  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
311  }
312 
313  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
314  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
315 
316  // Set peephole params info
317  aclParamsInfo.set_peephole_params(&aclCellToForgetWeightsInfo,
318  &aclCellToOutputWeightsInfo);
319  }
320 
321  if (descriptor.m_ProjectionEnabled)
322  {
323  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
324 
325  if (paramsInfo.m_ProjectionBias != nullptr)
326  {
327  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
328  }
329 
330  // Set projection params info
331  aclParamsInfo.set_projection_params(
332  &aclProjectionWeightsInfo,
333  paramsInfo.m_ProjectionBias != nullptr ? &aclProjectionBiasInfo : nullptr);
334  }
335 
336  if (descriptor.m_LayerNormEnabled)
337  {
338  if (!descriptor.m_CifgEnabled)
339  {
340  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
341  }
342 
343  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
344  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
345  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
346 
347  // Set layer norm params info
348  aclParamsInfo.set_layer_normalization_params(
349  paramsInfo.m_InputLayerNormWeights != nullptr ? &aclInputLayerNormWeightsInfo : nullptr,
350  &aclForgetLayerNormWeightsInfo,
351  &aclCellLayerNormWeightsInfo,
352  &aclOutputLayerNormWeightsInfo);
353  }
354 
355  if (!descriptor.m_CifgEnabled)
356  {
357  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
358  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
359  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
360 
361  // Set CIFG params info
362  aclParamsInfo.set_cifg_params(
363  &aclInputToInputWeightsInfo,
364  &aclRecurrentToInputWeightsInfo,
365  paramsInfo.m_CellToInputWeights != nullptr ? &aclCellToInputWeightsInfo : nullptr,
366  &aclInputGateBiasInfo);
367  }
368 
369  // Set scalar descriptor params
370  aclParamsInfo.set_cell_clip_params(descriptor.m_CellClip);
371  aclParamsInfo.set_projection_clip_params(descriptor.m_ProjectionClip);
372  aclParamsInfo.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
373  aclParamsInfo.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
374  descriptor.m_ForgetIntermediateScale,
375  descriptor.m_CellIntermediateScale,
376  descriptor.m_OutputIntermediateScale);
377 
378  // QLSTM CL validate
379  return arm_compute::CLQLSTMLayer::validate(&aclInputInfo,
380  &aclInputToForgetWeightsInfo,
381  &aclInputToCellWeightsInfo,
382  &aclInputToOutputWeightsInfo,
383  &aclRecurrentToForgetWeightsInfo,
384  &aclRecurrentToCellWeightsInfo,
385  &aclRecurrentToOutputWeightsInfo,
386  &aclForgetGateBiasInfo,
387  &aclCellBiasInfo,
388  &aclOutputGateBiasInfo,
389  &aclCellStateInInfo,
390  &aclOutputStateInInfo,
391  &aclCellStateOutInfo,
392  &aclOutputStateOutInfo,
393  &aclOutputInfo,
394  aclParamsInfo);
395 }

Referenced by ClLayerSupport::IsQLstmSupported().

◆ ClQuantizedLstmWorkloadValidate()

arm_compute::Status ClQuantizedLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo previousCellStateIn,
const TensorInfo previousOutputIn,
const TensorInfo cellStateOut,
const TensorInfo output,
const QuantizedLstmInputParamsInfo paramsInfo 
)

Definition at line 18 of file ClQuantizedLstmWorkload.cpp.

22 {
23  // Inputs
24  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
25  const arm_compute::TensorInfo aclPreviousCellStateInInfo = BuildArmComputeTensorInfo(previousCellStateIn);
26  const arm_compute::TensorInfo aclPreviousOutputInInfo = BuildArmComputeTensorInfo(previousOutputIn);
27 
28  // Outputs
29  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
30  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
31 
32  // Basic parameters
33  const arm_compute::TensorInfo aclInputToInputWeightsInfo
34  = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
35  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
36  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
37  const arm_compute::TensorInfo aclInputToCellWeightsInfo
38  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
39  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
40  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
41  const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo
42  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
43  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
44  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
45  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
46  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
47  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
48  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
49  const arm_compute::TensorInfo aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
50  const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
51  const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
52  const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
53 
54  return arm_compute::CLLSTMLayerQuantized::validate(&aclInputInfo, &aclInputToInputWeightsInfo,
55  &aclInputToForgetWeightsInfo, &aclInputToCellWeightsInfo,
56  &aclInputToOutputWeightsInfo, &aclRecurrentToInputWeightsInfo,
57  &aclRecurrentToForgetWeightsInfo, &aclRecurrentToCellWeightsInfo,
58  &aclRecurrentToOutputWeightsInfo, &aclInputGateBiasInfo,
59  &aclForgetGateBiasInfo, &aclCellBiasInfo, &aclOutputGateBiasInfo,
60  &aclPreviousCellStateInInfo, &aclPreviousOutputInInfo,
61  &aclCellStateOutInfo, &aclOutputInfo);
62 }

Referenced by ClLayerSupport::IsQuantizedLstmSupported().

◆ ClQuantizeWorkloadValidate()

arm_compute::Status ClQuantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 22 of file ClQuantizeWorkload.cpp.

24 {
25  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
27 
28  return arm_compute::CLQuantizationLayer::validate(&aclInputInfo,
29  &aclOutputInfo);
30 }

Referenced by ClLayerSupport::IsQuantizeSupported().

◆ ClReduceWorkloadValidate()

arm_compute::Status ClReduceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ReduceDescriptor descriptor 
)

Definition at line 18 of file ClReduceWorkload.cpp.

21 {
22  if (descriptor.m_vAxis.size() == 1 || descriptor.m_vAxis.empty())
23  {
24  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
25  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
28  input.GetNumDimensions(),
29  descriptor.m_vAxis);
30 
31  return arm_compute::CLReductionOperation::validate(&aclInputInfo,
32  &aclOutputInfo,
33  static_cast<unsigned int>(coords[0]),
35  descriptor.m_KeepDims);
36  }
37  else
38  {
39  // Validate layer if there are multiple axes.
40  arm_compute::Status status;
41  IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, descriptor, status);
42  return status;
43  }
44 }

References ReduceDescriptor::m_vAxis.

Referenced by ClLayerSupport::IsReduceSupported().

◆ ClReshapeWorkloadValidate()

arm_compute::Status ClReshapeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 15 of file ClReshapeWorkload.cpp.

17 {
18  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
19  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
20 
21  return arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
22 }

Referenced by ClLayerSupport::IsReshapeSupported().

◆ ClResizeWorkloadValidate()

arm_compute::Status ClResizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ResizeDescriptor descriptor 
)

Definition at line 22 of file ClResizeWorkload.cpp.

25 {
26  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
27  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
28 
29  arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout);
30  aclInputInfo.set_data_layout(aclDataLayout);
31  aclOutputInfo.set_data_layout(aclDataLayout);
32 
33  arm_compute::InterpolationPolicy aclInterpolationPolicy =
35 
36  arm_compute::SamplingPolicy samplingPolicy = descriptor.m_HalfPixelCenters ? arm_compute::SamplingPolicy::CENTER :
37  arm_compute::SamplingPolicy::TOP_LEFT;
38 
39  return arm_compute::CLScale::validate(&aclInputInfo,
40  &aclOutputInfo,
41  arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
42  arm_compute::BorderMode::REPLICATE,
43  arm_compute::PixelValue(0.f),
44  samplingPolicy,
45  true,
46  descriptor.m_AlignCorners));
47 }

Referenced by ClLayerSupport::IsResizeSupported().

◆ ClReverseV2WorkloadValidate()

arm_compute::Status ClReverseV2WorkloadValidate ( const TensorInfo input,
const TensorInfo axis,
const TensorInfo output 
)

Definition at line 16 of file ClReverseV2Workload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclAxis = BuildArmComputeTensorInfo(axis);
22  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::CLReverse::validate(&aclInput, &aclOutput, &aclAxis, true);
25 }

Referenced by ClLayerSupport::IsReverseV2Supported().

◆ ClRsqrtWorkloadValidate()

arm_compute::Status ClRsqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClRsqrtWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLRsqrtLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClScatterNdWorkloadValidate()

arm_compute::Status ClScatterNdWorkloadValidate ( const TensorInfo inputInfo,
const TensorInfo indicesInfo,
const TensorInfo updatesInfo,
const TensorInfo outputInfo,
const ScatterNdDescriptor descriptor 
)

Definition at line 20 of file ClScatterNdWorkload.cpp.

25 {
26  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(inputInfo);
27  const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indicesInfo);
28  const arm_compute::TensorInfo aclUpdatesInfo = BuildArmComputeTensorInfo(updatesInfo);
29  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
30 
31  arm_compute::ScatterInfo scatterInfo = BuildArmComputeScatterInfo(descriptor);
32 
33  return arm_compute::CLScatter::validate(descriptor.m_InputEnabled ? &aclInputInfo : nullptr,
34  &aclUpdatesInfo,
35  &aclIndicesInfo,
36  &aclOutputInfo,
37  scatterInfo);
38 }

Referenced by ClLayerSupport::IsScatterNdSupported().

◆ ClSinWorkloadValidate()

arm_compute::Status ClSinWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClSinWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLSinLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClSliceWorkloadValidate()

arm_compute::Status ClSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SliceDescriptor descriptor 
)

Definition at line 18 of file ClSliceWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
27 
28  std::tie(starts, ends) = SetClSliceData(descriptor.m_Begin, descriptor.m_Size);
29 
30  return arm_compute::CLSlice::validate(&aclInput, &aclOutput, starts, ends);
31 }

Referenced by ClLayerSupport::IsSliceSupported().

◆ ClSoftmaxWorkloadValidate()

arm_compute::Status ClSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor 
)

Definition at line 17 of file ClSoftmaxWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
25  return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
26 }

Referenced by ClLayerSupport::IsSoftmaxSupported().

◆ ClSpaceToBatchNdWorkloadValidate()

arm_compute::Status ClSpaceToBatchNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToBatchNdDescriptor descriptor 
)

Definition at line 16 of file ClSpaceToBatchNdWorkload.cpp.

19 {
20  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
21  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
22 
23  arm_compute::Status statusSpaceToBatch = arm_compute::Status(arm_compute::ErrorCode::OK);
24  arm_compute::Status statusReshapeInput = arm_compute::Status(arm_compute::ErrorCode::OK);
25  arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
26 
27  arm_compute::TensorInfo aclReshapeInputInfo = aclInputInfo;
28  arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
29 
30  // When a spacial dimension is missing (rank=3) set W to 1
31  const unsigned int rank = input.GetNumDimensions();
32  if (rank == 3)
33  {
34  const arm_compute::TensorShape inputShape = aclInputInfo.tensor_shape();
35  const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
36 
37  if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
38  {
39  // In ACL dimensions are right to left: C, W, H, N
40  aclInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
41  aclOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
42  }
43  else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
44  {
45  // In ACL dimensions are right to left: W, H, C, N
46  aclInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
47  aclOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
48  }
49  else
50  {
51  throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
52  }
53 
54  statusReshapeInput = arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
55  statusReshapeOutput = arm_compute::CLReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
56  }
57 
58  // ArmNN blockShape is [H, W] ACl asks for W, H
59  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
60  int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
61 
62  unsigned int padLeft = (rank == 3) ? 0 : descriptor.m_PadList[1].first;
63  unsigned int padRight = (rank == 3) ? 0 : descriptor.m_PadList[1].second;
64  arm_compute::Size2D paddingLeftTop = BuildArmComputeSize2D(padLeft,
65  descriptor.m_PadList[0].first);
66  arm_compute::Size2D paddingRightBottom = BuildArmComputeSize2D(padRight,
67  descriptor.m_PadList[0].second);
68 
69  const arm_compute::Status aclStatus = arm_compute::CLSpaceToBatchLayer::validate(&aclInputInfo,
70  blockWidth,
71  blockHeight,
72  paddingLeftTop,
73  paddingRightBottom,
74  &aclOutputInfo);
75 
76  if (statusReshapeInput.error_code() == arm_compute::ErrorCode::OK &&
77  statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
78  statusSpaceToBatch.error_code() == arm_compute::ErrorCode::OK)
79  {
80  return arm_compute::Status(arm_compute::ErrorCode::OK,
81  "All SpaceToBatch layers validate status OK.");
82  }
83  else
84  {
85  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
86  "SpaceToBatch layer validate status failed."
87  + statusSpaceToBatch.error_description()
88  + statusReshapeInput.error_description()
89  + statusReshapeOutput.error_description());
90  }
91 }

Referenced by ClLayerSupport::IsSpaceToBatchNdSupported().

◆ ClSpaceToDepthWorkloadValidate()

arm_compute::Status ClSpaceToDepthWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToDepthDescriptor descriptor 
)

Definition at line 54 of file ClSpaceToDepthWorkload.cpp.

57 {
58  DataLayout dataLayout = descriptor.m_DataLayout;
59  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
60 
61  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
62 
63  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
64 
65  const arm_compute::Status aclStatus = arm_compute::CLSpaceToDepthLayer::validate(&aclInputInfo,
66  &aclOutputInfo,
67  blockSize);
68  return aclStatus;
69 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsSpaceToDepthSupported().

◆ ClSplitterWorkloadValidate()

arm_compute::Status ClSplitterWorkloadValidate ( const TensorInfo input,
const std::vector< std::reference_wrapper< TensorInfo >> &  outputs,
unsigned int  splitAxis 
)

Definition at line 32 of file ClSplitterWorkload.cpp.

35 {
36  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
37 
38  size_t numOutputs = outputs.size();
39 
40  std::vector<arm_compute::TensorInfo> aclOutputs;
41  aclOutputs.reserve(numOutputs);
42 
43  std::vector<arm_compute::ITensorInfo*> aclOutputPtr;
44  aclOutputPtr.reserve(numOutputs);
45 
46  for (size_t i = 0u; i < outputs.size(); ++i)
47  {
48  aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i]));
49  aclOutputPtr.emplace_back(&aclOutputs.back());
50  }
51 
52  unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis);
53  return arm_compute::CLSplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
54 }

Referenced by ClLayerSupport::IsSplitterSupported().

◆ ClSqrtWorkloadValidate()

arm_compute::Status ClSqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file ClSqrtWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  ActivationDescriptor descriptor;
25  descriptor.m_Function = ActivationFunction::Sqrt;
26  const arm_compute::ActivationLayerInfo activationLayerInfo =
28 
29  return arm_compute::CLActivationLayer::validate(&aclInput, &aclOutput, activationLayerInfo);
30 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClStackWorkloadValidate()

arm_compute::Status ClStackWorkloadValidate ( const std::vector< const TensorInfo * > &  inputs,
const TensorInfo output,
const StackDescriptor descriptor 
)

Definition at line 29 of file ClStackWorkload.cpp.

32 {
33  std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
34  arm_compute::TensorInfo aclInputInfo;
35  for (const TensorInfo* input : inputs)
36  {
37  aclInputInfo = BuildArmComputeTensorInfo(*input);
38  aclInputPtrs.emplace_back(&aclInputInfo);
39  }
40  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
41 
42  int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
43 
44  return arm_compute::CLStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
45 }

Referenced by ClLayerSupport::IsStackSupported().

◆ ClStridedSliceWorkloadValidate()

arm_compute::Status ClStridedSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const StridedSliceDescriptor descriptor 
)

Definition at line 27 of file ClStridedSliceWorkload.cpp.

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
33 
37 
38  std::tie(starts, ends, strides) = SetClStridedSliceData(descriptor.m_Begin, descriptor.m_End, descriptor.m_Stride);
39 
40  auto numDimensions = armnn::numeric_cast<int>(input.GetNumDimensions());
41  int32_t begin_mask = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions);
42  int32_t end_mask = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions);
43  int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions);
44 
45  return arm_compute::CLStridedSlice::validate(&aclInputInfo,
46  &aclOutputInfo,
47  starts,
48  ends,
49  strides,
50  begin_mask,
51  end_mask,
52  shrink_axis_mask);
53 }

Referenced by ClLayerSupport::IsStridedSliceSupported().

◆ ClSubtractionValidate()

arm_compute::Status ClSubtractionValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 46 of file ClSubtractionWorkload.cpp.

50 {
51  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
52  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
53  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
54 
55  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
56  activationDescriptor);
57 
58  const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info,
59  &aclInput1Info,
60  &aclOutputInfo,
61  g_AclConvertPolicy,
62  activationInfo);
63 
64  return aclStatus;
65 }

Referenced by ClLayerSupport::IsLayerSupported(), ClLayerSupport::IsSubtractionSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClTensorHandleFactoryId()

constexpr const char* armnn::ClTensorHandleFactoryId ( )
constexpr

Definition at line 15 of file ClTensorHandleFactory.hpp.

16 {
17  return "Arm/Cl/TensorHandleFactory";
18 }

Referenced by ClTensorHandleFactory::GetIdStatic().

◆ ClTileWorkloadValidate()

arm_compute::Status ClTileWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TileDescriptor descriptor 
)

Definition at line 16 of file ClTileWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
22 
23  std::vector<uint32_t> aclMultiples = descriptor.m_Multiples;
24  std::reverse(aclMultiples.begin(),aclMultiples.end());
25 
26  return arm_compute::CLTile::validate(&aclInput, &aclOutput, aclMultiples);
27 }

Referenced by ClLayerSupport::IsTileSupported().

◆ ClTransposeConvolution2dWorkloadValidate()

arm_compute::Status ClTransposeConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 26 of file ClTransposeConvolution2dWorkload.cpp.

31 {
32  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
33  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
34  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
35 
36  arm_compute::TensorInfo aclBiasesInfo;
37  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
38 
39  if (descriptor.m_BiasEnabled)
40  {
41  if (!biases.has_value())
42  {
43  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
44  "ArmNN ClTransposeConv2dWorkload has empty bias value."};
45  }
46  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
47  optionalAclBiasesInfo = &aclBiasesInfo;
48  }
49 
50  arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(descriptor);
51 
52  return arm_compute::CLDeconvolutionLayer::validate(&aclInputInfo,
53  &aclWeightsInfo,
54  optionalAclBiasesInfo,
55  &aclOutputInfo,
56  padStrideInfo);
57 }

Referenced by ClLayerSupport::IsTransposeConvolution2dSupported().

◆ ClTransposeWorkloadValidate()

arm_compute::Status ClTransposeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeDescriptor descriptor 
)

Definition at line 17 of file ClTransposeWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
24 
25  return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo,
26  armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
27 }

Referenced by ClLayerSupport::IsTransposeSupported().

◆ ClUnidirectionalSequenceLstmFloatWorkloadValidate()

arm_compute::Status ClUnidirectionalSequenceLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const UnidirectionalSequenceLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 508 of file ClUnidirectionalSequenceLstmFloatWorkload.cpp.

516 {
517  TensorShape inputLayerShape = input.GetShape();
518  TensorShape outputLayerShape = output.GetShape();
519 
520  if (inputLayerShape.GetNumDimensions() != 3)
521  {
522  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
523  "Unidirectional Sequence LSTM layer validate status failed.");
524  }
525 
526  unsigned int maxTime = descriptor.m_TimeMajor?inputLayerShape[0]:inputLayerShape[1];
527  unsigned int batchSize = descriptor.m_TimeMajor?inputLayerShape[1]:inputLayerShape[0];
528  unsigned int inputSize = inputLayerShape[2];
529  unsigned int outputSize = outputLayerShape[2];
530 
531  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
532  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
533 
534  arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
535  "Permute1 status");
536  arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
537  "Split status");
538  arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
539  "LSTM status");
540  arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
541  "Concat status");
542  arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
543  "Permute2 status");
544 
545  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
546  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
547 
548  //
549  // Permute validate
550  //
551  TensorInfo permuteOutInfo = armnnUtils::Permuted(input, { 1U, 0U, 2U });
552  arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
553  if (!descriptor.m_TimeMajor)
554  {
555  statusPermute1 = arm_compute::CLPermute::validate(&aclInputInfo,
556  &aclPermuteOutInfo,
557  arm_compute::PermutationVector(0U, 2U, 1U));
558  }
559 
560  //
561  // Split and Concat Tensors validate
562  //
563  std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
564  std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
565  std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
566  std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
567  splitterOutputsTensorInfos.reserve(maxTime);
568  concatInputsTensorInfos.reserve(maxTime);
569  for (unsigned int i = 0; i < maxTime; ++i)
570  {
571  arm_compute::TensorInfo splitter_out;
572  arm_compute::TensorInfo concat_in;
573 
574  auto splitterTensorInfo = TensorInfo(input);
575  auto concatTensorInfo = TensorInfo(output);
576  splitterTensorInfo.SetShape({batchSize, inputSize});
577  concatTensorInfo.SetShape({batchSize, outputSize});
578 
579  arm_compute::TensorInfo aclSplitterTensorInfo
580  = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
581  arm_compute::TensorInfo aclConcatTensorInfo
582  = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
583 
584  splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
585  concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
586  splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
587  concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
588  }
589 
590  //
591  // Split validate
592  //
593  unsigned int numberDimensions = 3;
594  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
595  unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
596 
597  if (maxTime != 1) // ACL split does not work with only one element to split.
598  {
599  if (!descriptor.m_TimeMajor)
600  {
601  statusSplit = arm_compute::CLSplit::validate(&aclPermuteOutInfo,
602  splitterOutputsTensorInfosPtr,
603  aclAxisSplit);
604  }
605  else
606  {
607  statusSplit = arm_compute::CLSplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
608  }
609  }
610 
611  //
612  // LSTM validate
613  //
614 
615  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
616 
617  unsigned int numUnits = cellStateIn.GetShape()[1];
618  unsigned int scratchBufferFactor = 4;
619 
620  if (descriptor.m_CifgEnabled)
621  {
622  // scratchBuffer = { batchSize, numUnits * 3 } with CIFG
623  scratchBufferFactor = 3;
624  }
625 
626  const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());
627 
628  // The inputs and outputs
629  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
630  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
631  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
632  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
633  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
634 
635  // Basic parameters
636  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
637  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
638  const arm_compute::TensorInfo aclInputToCellWeightsInfo
639  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
640  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
641  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
642  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
643  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
644  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
645  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
646  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
647  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
648  const arm_compute::TensorInfo aclForgetGateBiasInfo
649  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
650  const arm_compute::TensorInfo aclCellBiasInfo
651  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
652  const arm_compute::TensorInfo aclOutputGateBiasInfo
653  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
654 
655  arm_compute::TensorInfo aclInputToInputWeightsInfo;
656  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
657  arm_compute::TensorInfo aclCellToInputWeightsInfo;
658  arm_compute::TensorInfo aclInputGateBiasInfo;
659  arm_compute::TensorInfo aclProjectionWeightsInfo;
660  arm_compute::TensorInfo aclProjectionBiasInfo;
661  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
662  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
663 
664  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
665  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
666  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
667  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
668 
669 
670  if (!descriptor.m_CifgEnabled)
671  {
672  if (descriptor.m_PeepholeEnabled)
673  {
674  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
675  }
676  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
677  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
678  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
679 
680  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
681  &aclRecurrentToInputWeightsInfo,
682  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
683  &aclInputGateBiasInfo);
684  }
685 
686  if (descriptor.m_ProjectionEnabled)
687  {
688  if (paramsInfo.m_ProjectionBias != nullptr)
689  {
690  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
691  }
692  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
693 
694  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
695  paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
696  }
697 
698  if (descriptor.m_PeepholeEnabled)
699  {
700  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
701  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
702 
703  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
704  }
705 
706  if (descriptor.m_LayerNormEnabled)
707  {
708  if (!descriptor.m_CifgEnabled)
709  {
710  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
711  }
712  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
713  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
714  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
715 
716  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
717  &aclInputLayerNormWeightsInfo,
718  &aclForgetLayerNormWeightsInfo,
719  &aclCellLayerNormWeightsInfo,
720  &aclOutputLayerNormWeightsInfo);
721  }
722 
723  // Need to be set at negative threshold to be compatible for ACL
724  float cell_threshold = descriptor.m_ClippingThresCell;
725  float projection_threshold = descriptor.m_ClippingThresProj;
726 
727  arm_compute::ActivationLayerInfo activationLayerInfo =
728  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
729 
730  for (unsigned int i = 0; i != maxTime; ++i)
731  {
732 
733  // Set LSTM input and output ITensors depending on:
734  // input format (timeMajor) & number of LSTM batches (maxTime).
735  arm_compute::ITensorInfo* outputLSTM;
736  arm_compute::ITensorInfo* inputLSTM;
737  // If there is only one LSTM time major batch, we will not concat OR permute.
738  // Set input of LSTM to be first input ITensor.
739  // Set output of LSTM to be final output ITensor.
740  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
741  if (maxTime == 1 && !descriptor.m_TimeMajor)
742  {
743  TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
744  TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
745  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
746  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
747  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
748  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
749  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
750  inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
751  const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
752  outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
753  }
754  // If there is only one LSTM batch major batch, we will not concat, only permute.
755  // Set input of LSTM to be output of initial permute.
756  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
757  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
758  else if (maxTime == 1 && !descriptor.m_TimeMajor)
759  {
760  TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
761  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
762  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
763  aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
764  inputLSTM = &aclPermuteOutInfo;
765  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
766  }
767  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
768  else
769  {
770  inputLSTM = splitterOutputsTensorInfosPtr[i];
771  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
772  }
773 
774  statusLSTM = arm_compute::CLLSTMLayer::validate(inputLSTM,
775  &aclInputToForgetWeightsInfo,
776  &aclInputToCellWeightsInfo,
777  &aclInputToOutputWeightsInfo,
778  &aclRecurrentToForgetWeightsInfo,
779  &aclRecurrentToCellWeightsInfo,
780  &aclRecurrentToOutputWeightsInfo,
781  &aclForgetGateBiasInfo,
782  &aclCellBiasInfo,
783  &aclOutputGateBiasInfo,
784  &aclOutputStateInInfo,
785  &aclCellStateInInfo,
786  &aclScratchBufferInfo,
787  &aclOutputStateOutInfo,
788  &aclCellStateOutInfo,
789  outputLSTM,
790  lstm_params_info,
791  activationLayerInfo,
792  cell_threshold,
793  projection_threshold);
794 
795  if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
796  {
797  break;
798  }
799  }
800 
801  //
802  // Concat validate
803  //
804 
805  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
806  TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
807  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
808  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
809 
810  TensorInfo concatOuputTensorInfo = TensorInfo(output);
811  concatOuputTensorInfo.SetShape(timeMajorShapeOutput);
812  arm_compute::TensorInfo aclConcatOuputTensorInfo= BuildArmComputeTensorInfo(concatOuputTensorInfo);
813 
814  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
815  {
816  for (unsigned int i = 0; i < maxTime; ++i)
817  {
818  auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
819  concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
820  }
821 
822  unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
823  if (!descriptor.m_TimeMajor)
824  {
825  statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
826  &aclConcatOuputTensorInfo,
827  aclAxisConcat);
828  }
829  else
830  {
831  statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
832  &aclOutputInfo,
833  aclAxisConcat);
834  }
835  }
836  // If only one LSTM batch, we do not concat and/or permute.
837  // Must ensure final output info is expanded to correct batch major dimensions.
838  else
839  {
840  if (!descriptor.m_TimeMajor)
841  {
842  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
843  BuildArmComputeTensorShape(shapeExpandBatchMajor));
844  }
845  else
846  {
847  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
848  BuildArmComputeTensorShape(shapeExpandTimeMajor));
849  }
850  }
851  //
852  // Permute validate
853  //
854  if (!descriptor.m_TimeMajor)
855  {
856  // Output now time major. Permute output back to batch major.
857  if (maxTime != 1)
858  {
859  statusPermute2 = arm_compute::CLPermute::validate(&aclConcatOuputTensorInfo,
860  &aclOutputInfo,
861  arm_compute::PermutationVector(0U, 2U, 1U));
862  }
863  else
864  {
865  statusPermute2 = arm_compute::CLPermute::validate(concatInputsTensorInfosPtr[0],
866  &aclOutputInfo,
867  arm_compute::PermutationVector(0U, 2U, 1U));
868  }
869  }
870 
871  auto okCode = arm_compute::ErrorCode::OK;
872  if (statusPermute1.error_code() == okCode &&
873  statusSplit.error_code() == okCode &&
874  statusLSTM .error_code() == okCode &&
875  statusConcat.error_code() == okCode &&
876  statusPermute2.error_code() == okCode)
877  {
878  return arm_compute::Status(arm_compute::ErrorCode::OK,
879  "All Unidirectional Sequence LSTM layer validate status OK.");
880  }
881  else
882  {
883  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
884  "Unidirectional Sequence LSTM layer validate status failed.");
885  }
886 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by ClLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ CollapseLeadingUnitDimensions()

bool armnn::CollapseLeadingUnitDimensions ( const TensorInfo in,
TensorInfo out 
)
inline

Definition at line 14 of file NeonBackendOptimizationUtils.hpp.

15 {
16  unsigned int numDimensions = in.GetNumDimensions();
17  for (unsigned int i = 0; i < (numDimensions-1); ++i)
18  {
19  if (in.GetShape()[i] != 1)
20  {
21  return false;
22  }
23  }
24 
25  unsigned int w = in.GetShape()[numDimensions-1];
26  out = in;
27  out.SetShape({w});
28 
29  return true;
30 }

References TensorInfo::GetNumDimensions(), TensorInfo::GetShape(), and TensorInfo::SetShape().

Referenced by BuildAddMulAddTensorInfoLists().

◆ Combine() [1/2]

MemorySourceFlags armnn::Combine ( Arg  source,
Args...  rest 
)

Definition at line 36 of file MemorySources.hpp.

37 {
38  return static_cast<MemorySourceFlags>(source) | Combine(rest...);
39 }

References Combine().

◆ Combine() [2/2]

MemorySourceFlags armnn::Combine ( Arg  sourceA,
Arg  sourceB 
)

Definition at line 30 of file MemorySources.hpp.

31 {
32  return static_cast<MemorySourceFlags>(sourceA) | static_cast<MemorySourceFlags>(sourceB);
33 }

Referenced by Combine().

◆ ComputeAclAxis()

int armnn::ComputeAclAxis ( const int &  armnnAxis,
const armnn::TensorInfo tensor 
)
inline

Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank)

Definition at line 246 of file ArmComputeUtils.hpp.

247 {
248  int rank = static_cast<int>(tensor.GetNumDimensions());
249 
250  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(rank != 0, "The number of dimensions in this tensor cannot be zero.");
251  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(armnnAxis < rank, "Incompatible value of armnnAxis.");
252  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE((-1 * rank) <= armnnAxis, "Incompatible value of armnnAxis.");
253 
254  int sign = (armnnAxis < 0) ? -1 : 1;
255  int aclAxis = sign * rank - 1 - armnnAxis;
256 
257  return aclAxis;
258 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, and TensorInfo::GetNumDimensions().

Referenced by ClGatherWorkload::ClGatherWorkload(), ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(), ClSoftmaxWorkload::ClSoftmaxWorkload(), NeonGatherWorkload::NeonGatherWorkload(), NeonLogSoftmaxWorkload::NeonLogSoftmaxWorkload(), and NeonSoftmaxWorkload::NeonSoftmaxWorkload().

◆ ComputeConv3DInfo() [1/2]

arm_compute::Conv3dInfo armnn::ComputeConv3DInfo ( const armnn::Convolution3dDescriptor  descriptor,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)
inline

Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor.

Definition at line 261 of file ArmComputeUtils.hpp.

264 {
265  const arm_compute::Size3D stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
266  const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
267  descriptor.m_PadTop, descriptor.m_PadBottom,
268  descriptor.m_PadFront, descriptor.m_PadBack};
269  const arm_compute::Size3D dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
270 
271  const arm_compute::ActivationLayerInfo activationInfo =
273  const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
274 
275  return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
276 }

References ConvertActivationDescriptorToAclActivationLayerInfo(), Convolution3dDescriptor::m_DilationX, Convolution3dDescriptor::m_DilationY, Convolution3dDescriptor::m_DilationZ, Convolution3dDescriptor::m_PadBack, Convolution3dDescriptor::m_PadBottom, Convolution3dDescriptor::m_PadFront, Convolution3dDescriptor::m_PadLeft, Convolution3dDescriptor::m_PadRight, Convolution3dDescriptor::m_PadTop, Convolution3dDescriptor::m_StrideX, Convolution3dDescriptor::m_StrideY, and Convolution3dDescriptor::m_StrideZ.

◆ ComputeConv3DInfo() [2/2]

arm_compute::Conv3dInfo armnn::ComputeConv3DInfo ( const armnn::Convolution3dQueueDescriptor  queueDescriptor,
bool  isFastMathEnabled 
)
inline

Definition at line 278 of file ArmComputeUtils.hpp.

280 {
281  auto descriptor = queueDescriptor.m_Parameters;
282  const arm_compute::Size3D stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
283  const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
284  descriptor.m_PadTop, descriptor.m_PadBottom,
285  descriptor.m_PadFront, descriptor.m_PadBack};
286  const arm_compute::Size3D dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
287 
288  const arm_compute::ActivationLayerInfo activationInfo =
290  const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
291 
292  return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
293 }

References ConvertAdditionalInfoToAclActivationLayerInfo(), QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Convolution3dDescriptor::m_StrideX.

◆ ComputeReductionTensorShape()

const TensorInfo armnn::ComputeReductionTensorShape ( const armnn::TensorInfo input,
const std::vector< uint32_t > &  vAxis,
const bool  keepDims 
)
inline

Function to compute the output tensor shape based on the axes and if keepDims is set.

Definition at line 320 of file ArmComputeUtils.hpp.

323 {
324  auto reducedTensorInfo = input;
325  unsigned int rank = reducedTensorInfo.GetNumDimensions();
326  unsigned int outputRank = 0;
327  // Calculate output dimension
328  if (keepDims)
329  {
330  outputRank = rank;
331  }
332  else if (vAxis.empty())
333  {
334  outputRank = 1;
335  }
336  else if (vAxis.size() > reducedTensorInfo.GetNumDimensions())
337  {
338  throw LayerValidationException("ReduceLayer: Dimensions to reduce can not be bigger than input dimensions");
339  }
340  else
341  {
342  outputRank = reducedTensorInfo.GetNumDimensions() - armnn::numeric_cast<unsigned int>(vAxis.size());
343  if (outputRank == 0)
344  {
345  outputRank = 1;
346  }
347  }
348  std::vector<unsigned int> dimSizes(outputRank, 1);
349  if (!vAxis.empty())
350  {
351  // Skip the dimension that has been reduced unless keepDims is true.
352  unsigned int outputIndex = 0;
353  for (unsigned int i = 0; i < reducedTensorInfo.GetNumDimensions(); ++i)
354  {
355  if (std::find(vAxis.begin(), vAxis.end(), i) == vAxis.end())
356  {
357  dimSizes[outputIndex] = armnn::numeric_cast<unsigned int>(reducedTensorInfo.GetShape()[i]);
358  ++outputIndex;
359  }
360  else if (keepDims)
361  {
362  dimSizes[outputIndex] = 1;
363  ++outputIndex;
364  }
365  }
366  }
367  const TensorShape inferredShape = TensorShape(outputRank, dimSizes.data());
368  reducedTensorInfo.SetShape(inferredShape);
369  return reducedTensorInfo;
370 }

References TensorInfo::GetNumDimensions().

Referenced by ChainReduceLayers().

◆ ComputeSoftmaxAclAxis()

T armnn::ComputeSoftmaxAclAxis ( const SoftmaxDescriptor softmaxDesc,
const armnn::TensorInfo tensor 
)
inline

Definition at line 227 of file ArmComputeUtils.hpp.

228 {
229  // Detect the Android default value of -1 and return the ACL default value of 0.
230  if (softmaxDesc.m_Axis == -1)
231  {
232  return 0;
233  }
234 
235  unsigned int dim = tensor.GetNumDimensions();
236  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(dim != 0, "The number of dimensions in this tensor cannot be zero.");
237 
238  // Currently ArmNN support axis 1.
239  auto aclAxis = (static_cast<T>(dim) - 1);
240  aclAxis = aclAxis > 0 ? aclAxis -1 : aclAxis;
241 
242  return aclAxis;
243 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, TensorInfo::GetNumDimensions(), and SoftmaxDescriptor::m_Axis.

◆ ComputeSplitAxis()

std::set< unsigned int > ComputeSplitAxis ( const armnn::SplitterDescriptor desc,
const TensorShape input 
)

Calculates the axis values for split operation.

Parameters
desc- Splitter Descriptor
input- Input tensor shape
Returns
- A set containing axis values of slitter operation

Definition at line 377 of file WorkloadUtils.cpp.

378 {
379  unsigned int numSplit = desc.GetNumViews();
380  unsigned int numDimensions = desc.GetNumDimensions();
381  std::set<unsigned int> splitAxis;
382  if (desc.HasAxis())
383  {
384  splitAxis.insert(armnnUtils::GetUnsignedAxis(desc.GetNumDimensions(), desc.GetAxis()));
385  }
386  else
387  {
388  for (unsigned int i = 0; i < numSplit; ++i)
389  {
390  for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
391  {
392  if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
393  {
394  splitAxis.insert(dimIdx);
395  }
396  }
397  }
398  }
399  return splitAxis;
400 }

References ViewsDescriptor::GetAxis(), ViewsDescriptor::GetNumDimensions(), ViewsDescriptor::GetNumViews(), armnnUtils::GetUnsignedAxis(), ViewsDescriptor::GetViewSizes(), and ViewsDescriptor::HasAxis().

Referenced by ClSplitterWorkload::ClSplitterWorkload(), NeonLayerSupport::IsSplitterSupported(), ClLayerSupport::IsSplitterSupported(), and NeonSplitterWorkload::NeonSplitterWorkload().

◆ Concatenate()

void Concatenate ( const ConcatQueueDescriptor data,
std::vector< ITensorHandle * >  inputs,
std::vector< ITensorHandle * >  outputs 
)

Definition at line 14 of file Concatenate.cpp.

17 {
18  const TensorInfo& outputInfo0 = GetTensorInfo(outputs[0]);
19 
20  std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo0, outputs[0]->Map());
21  Encoder<float>& encoder = *encoderPtr;
22 
23  for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index)
24  {
25  unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
26 
27  unsigned int indexRemainder = index;
28  unsigned int dimensionStride = outputInfo0.GetNumElements();
29 
30  for (unsigned int i = 0; i < outputInfo0.GetNumDimensions(); i++)
31  {
32  dimensionStride /= outputInfo0.GetShape()[i];
33  indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
34  indexRemainder -= indices[i] * dimensionStride;
35  }
36 
37  for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
38  {
39  ConcatQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
40 
41  //Split view extents are defined by the size of (the corresponding) input tensor.
42  const TensorInfo& inputInfo = GetTensorInfo(inputs[viewIdx]);
44  inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions(),
45  "The number of output dimensions does not match the number of input dimensions.");
46 
47  // Check all dimensions to see if this element is inside the given input view.
48  bool insideView = true;
49  for (unsigned int i = 0; i < inputInfo.GetNumDimensions(); i++)
50  {
51  if (indices[i] < view.m_Origin[i])
52  {
53  insideView = false;
54  }
55  if (indices[i] >= view.m_Origin[i] + inputInfo.GetShape()[i])
56  {
57  insideView = false;
58  }
59  }
60 
61  if (insideView)
62  {
63  std::unique_ptr<Decoder<float>> decoderPtr =
64  MakeDecoder<float>(inputInfo,inputs[viewIdx]->Map());
65  Decoder<float>& decoder = *decoderPtr;
66  unsigned int inIndex = 0;
67  unsigned int dimensionStride = 1;
68 
69  for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;)
70  {
71  inIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
72  dimensionStride *= inputInfo.GetShape()[i];
73  }
74  decoder += inIndex;
75  encoder.Set(decoder.Get());
76 
77  //What should we do if input views overlap on the output tensor?
78  //We could error, take the average, or shm else...
79  //For now just stop after finding first view (input) that matches.
80  break;
81  }
82  }
83  ++encoder;
84  }
85 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), ConcatQueueDescriptor::ViewOrigin::m_Origin, ConcatQueueDescriptor::m_ViewOrigins, Map, MaxNumOfTensorDimensions, and Encoder< IType >::Set().

◆ ConditionalThrow() [1/2]

void armnn::ConditionalThrow ( bool  condition)

Definition at line 174 of file Exceptions.hpp.

175 {
176  if (!condition)
177  {
178  throw ExceptionType();
179  }
180 }

◆ ConditionalThrow() [2/2]

void armnn::ConditionalThrow ( bool  condition,
const std::string &  message 
)

Definition at line 165 of file Exceptions.hpp.

166 {
167  if (!condition)
168  {
169  throw ExceptionType(message);
170  }
171 }

◆ ConditionalThrowIfNotEqual()

void armnn::ConditionalThrowIfNotEqual ( const std::string &  message,
const ComparedType &  leftHandSide,
const ComparedType &  rightHandSide 
)

ComparedType must support: operator==(const ComparedType&) operator<<(ostream&, const ComparedType&)

Definition at line 189 of file Exceptions.hpp.

192 {
193  if (!(leftHandSide == rightHandSide))
194  {
195  std::stringstream ss;
196  ss << message << " : " << leftHandSide << " != " << rightHandSide;
197  throw ExceptionType(ss.str());
198  }
199 }

◆ ConfigureDetailsObject()

void armnn::ConfigureDetailsObject ( JsonChildObject detailsObject,
std::string  layerDetailsStr 
)

Definition at line 306 of file Profiling.cpp.

308 {
309  detailsObject.SetType(JsonObjectType::ExecObjectDesc);
310  detailsObject.SetAndParseDetails(layerDetailsStr);
311 
312 }

References ExecObjectDesc, JsonChildObject::SetAndParseDetails(), and JsonChildObject::SetType().

◆ ConfigureLogging()

void ConfigureLogging ( bool  printToStandardOutput,
bool  printToDebugOutput,
LogSeverity  severity 
)

Configures the logging behaviour of the ARMNN library.

printToStandardOutput: Set to true if log messages should be printed to the standard output. printToDebugOutput: Set to true if log messages be printed to a platform-specific debug output (where supported). severity: All log messages that are at this severity level or higher will be printed, others will be ignored.

Examples
AsyncExecutionSample.cpp, CustomMemoryAllocatorSample.cpp, and SimpleSample.cpp.

Definition at line 20 of file Utils.cpp.

21 {
22  SetAllLoggingSinks(printToStandardOutput, printToDebugOutput, false);
23  SetLogFilter(severity);
24 }

References SetAllLoggingSinks(), and SetLogFilter().

Referenced by ArmnnDevice::ArmnnDevice(), ConfigureLoggingTest(), and main().

◆ ConfigureTuner()

void armnn::ConfigureTuner ( arm_compute::CLTuner &  tuner,
TuningLevel  level 
)
inline

Definition at line 44 of file ArmComputeTuningUtils.hpp.

45 {
46  tuner.set_tune_new_kernels(true); // Turn on tuning initially.
47 
48  switch (level)
49  {
50  case TuningLevel::Rapid:
51  ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)";
52  tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID);
53  break;
54  case TuningLevel::Normal:
55  ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)";
56  tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL);
57  break;
58  case TuningLevel::Exhaustive:
59  ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)";
60  tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE);
61  break;
62  case TuningLevel::None:
63  default:
64  tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode.
65  break;
66  }
67 }

References ARMNN_LOG, Exhaustive, info, None, Normal, and Rapid.

Referenced by ClBackendContext::ClBackendContext(), and GpuFsaBackendContext::GpuFsaBackendContext().

◆ ConnectedToLayerType()

bool armnn::ConnectedToLayerType ( Layer baseLayer,
LayerType  layerType,
unsigned int  dimSize = 0 
)
inline

Checks the Layer's Connections to see if it's connected to a Layer with the provided layerType.

If dimSize is provided will also check if the connecting Tensor has more than that number of dimensions

Definition at line 271 of file SubgraphUtils.hpp.

272 {
273  Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
274  TensorInfo parentTensorInfo = baseLayer->GetInputSlot(0).GetTensorInfo();
275 
276  if (parentTensorInfo.GetNumDimensions() > dimSize && parentLayer.GetType() == layerType)
277  {
278  return true;
279  }
280  for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
281  {
282  Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
283  TensorInfo nextTensorInfo = baseLayer->GetOutputSlot(0).GetConnection(i)->GetTensorInfo();
284 
285  if (nextTensorInfo.GetNumDimensions() > dimSize && nextLayer.GetType() == layerType)
286  {
287  return true;
288  }
289  }
290  return false;
291 }

References InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), Layer::GetInputSlot(), TensorInfo::GetNumDimensions(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), InputSlot::GetTensorInfo(), and Layer::GetType().

◆ ConnectedToLayerWithNCHW()

bool armnn::ConnectedToLayerWithNCHW ( Layer baseLayer)
inline

Checks if the Layer is connected to any Layer that has an NCHW layout.

Definition at line 250 of file SubgraphUtils.hpp.

251 {
252  Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
253 
254  if (IsNCHW(parentLayer))
255  {
256  return true;
257  }
258  for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
259  {
260  Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
261  if (IsNCHW(nextLayer))
262  {
263  return true;
264  }
265  }
266  return false;
267 }

References InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), Layer::GetInputSlot(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), and IsNCHW().

Referenced by NeonBackend::OptimizeSubgraphView(), and ClBackend::OptimizeSubgraphView().

◆ Convert1HWOTensorInfoToAcl()

std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl ( const TensorInfo weightInfo,
const TensorInfo inputInfo,
const DataLayout  dataLayout 
)

Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library Returns a tuple of converted weights tensor info and depth multiplier.

Definition at line 177 of file WorkloadUtils.cpp.

180 {
181  unsigned int aclDepthMultiplier = 1;
182  TensorInfo weightsPermuted;
183  if (dataLayout == armnn::DataLayout::NHWC)
184  {
185  // No permutation required. Input and weights data layouts are the same.
186  aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[3];
187  weightsPermuted = weightInfo;
188  }
189 
190  else if (dataLayout == armnn::DataLayout::NCHW)
191  {
192  // Weights permutation required. Weights [N,H,W,C] and input [N,C,H,W] data layouts are different.
193  // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
194  aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[1];
195  PermutationVector permutationVector{ 0, 2, 3, 1 };
196  weightsPermuted = armnnUtils::Permuted(weightInfo, permutationVector);
197  }
198  else
199  {
200  throw InvalidArgumentException(fmt::format("Unknown data layout for tensor info conversion: {}",
201  GetDataLayoutName(dataLayout)));
202  }
203 
204  return std::make_tuple(weightsPermuted, aclDepthMultiplier);
205 }

References GetDataLayoutName(), TensorInfo::GetShape(), NCHW, NHWC, and armnnUtils::Permuted().

◆ Convert1HWOTensorToAcl()

std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl ( const ConstTensorHandle weightTensor,
const TensorInfo inputInfo,
const DataLayout  dataLayout,
void *  permuteBuffer 
)

Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library.

Parameters
weightTensor- ConstTensorHandle of weights tensor
inputInfo- TensorInfo of input tensor
dataLayout- DataLayout of the input tensor
permuteBuffer- Pointer to memory with the size of tensor. Used for the permutation
Returns
tuple of transformed weights-ConstTensor and depthwise multiplier

Definition at line 146 of file WorkloadUtils.cpp.

150 {
151  TensorInfo weightsInfo = weightTensor->GetTensorInfo();
152  unsigned int depthMultiplier = 1;
153  PermutationVector permutationVector{};
154  if (dataLayout == armnn::DataLayout::NHWC)
155  {
156  // No permutation required. Data layouts are the same.
157 
158  depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[3];
159  }
160  else if (dataLayout == armnn::DataLayout::NCHW)
161  {
162  // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
163  depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[1];
164  permutationVector = { 0, 2, 3, 1 };
165  }
166  else
167  {
168  throw InvalidArgumentException(fmt::format("Unknown data layout for tensor conversion: {}",
169  GetDataLayoutName(dataLayout)));
170  }
171 
172  ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
173 
174  return std::make_tuple(weightsPermuted, depthMultiplier);
175 }

References GetDataLayoutName(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), NCHW, NHWC, and PermuteTensor().

◆ Convert1HWOtoMIHW()

std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW ( const ConstTensorHandle weightTensor,
const TensorInfo inputInfo,
const DataLayout dataLayout,
void *  permuteBuffer 
)

Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W].

Parameters
weightTensor- ConstTensorHandle of the weight tensor that should be converted
inputInfo- TensorInfo of the corresponding input tensor
dataLayout- DataLayout of the input tensor e.g. NHWC or NCHW
permuteBuffer- Memory location with the same size as the weight tensor to write converted data to
Returns
- A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier

Definition at line 208 of file WorkloadUtils.cpp.

212 {
213  TensorInfo weightsInfo = weightTensor->GetTensorInfo();
214 
215  if (weightsInfo.HasPerAxisQuantization())
216  {
217  throw InvalidArgumentException("Can't convert tensor from [1,H,W,Cout] to [M,Cin,H,W] when per channel "
218  "quantization is applied.");
219  }
220 
221  // Reshape weights [ 1, H, W, I*M ] --> [ H, W, I, M ]
222  auto weightsShape = weightsInfo.GetShape();
223  auto channelIndex = armnnUtils::DataLayoutIndexed(dataLayout).GetChannelsIndex();
224  unsigned int depthMultiplier = weightsShape[3] / inputInfo.GetShape()[channelIndex];
225  weightsInfo.SetShape({ weightsShape[1],
226  weightsShape[2],
227  inputInfo.GetShape()[channelIndex],
228  depthMultiplier});
229 
230  // Permute [ H, W, I, M ] --> [ M, I, H, W ]
231  PermutationVector permutationVector = { 2, 3, 1, 0 };
232  ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
233 
234  return std::make_tuple(weightsPermuted, depthMultiplier);
235 }

References DataLayoutIndexed::GetChannelsIndex(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), TensorInfo::HasPerAxisQuantization(), PermuteTensor(), and TensorInfo::SetShape().

◆ ConvertActivationDescriptorToAclActivationLayerInfo() [1/2]

◆ ConvertActivationDescriptorToAclActivationLayerInfo() [2/2]

arm_compute::ActivationLayerInfo armnn::ConvertActivationDescriptorToAclActivationLayerInfo ( const ActivationDescriptor activationDescPtr)
inline

Definition at line 94 of file ArmComputeUtils.hpp.

95 {
96  if (activationDescPtr != nullptr)
97  {
98  return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
99  *activationDescPtr));
100  }
101  return arm_compute::ActivationLayerInfo();
102 }

References ConvertActivationDescriptorToAclActivationLayerInfo().

◆ ConvertActivationFunctionToAclActivationFunction()

arm_compute::ActivationLayerInfo::ActivationFunction armnn::ConvertActivationFunctionToAclActivationFunction ( ActivationFunction  armnnFunction)
inline

Definition at line 62 of file ArmComputeUtils.hpp.

63 {
64  using AclActivationFunction = arm_compute::ActivationLayerInfo::ActivationFunction;
65 
66  switch (armnnFunction)
67  {
68  case ActivationFunction::Linear: return AclActivationFunction::LINEAR;
69  // Arm compute's 'logistic' function is non-parameterized, so it is exactly a sigmoid function.
70  case ActivationFunction::Sigmoid: return AclActivationFunction::LOGISTIC;
71  case ActivationFunction::ReLu: return AclActivationFunction::RELU;
72  case ActivationFunction::BoundedReLu: return AclActivationFunction::LU_BOUNDED_RELU;
73  case ActivationFunction::SoftReLu: return AclActivationFunction::SOFT_RELU;
74  case ActivationFunction::LeakyReLu: return AclActivationFunction::LEAKY_RELU;
75  case ActivationFunction::Abs: return AclActivationFunction::ABS;
76  case ActivationFunction::Sqrt: return AclActivationFunction::SQRT;
77  case ActivationFunction::Square: return AclActivationFunction::SQUARE;
78  case ActivationFunction::TanH: return AclActivationFunction::TANH;
79  case ActivationFunction::Elu: return AclActivationFunction::ELU;
80  case ActivationFunction::HardSwish: return AclActivationFunction::HARD_SWISH;
81  case ActivationFunction::Gelu: return AclActivationFunction::GELU;
82  default: throw InvalidArgumentException("Unsupported activation function");
83  }
84 }

References Abs, BoundedReLu, Elu, Gelu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by ConvertActivationDescriptorToAclActivationLayerInfo().

◆ ConvertAdditionalInfoToAclActivationLayerInfo()

◆ ConvertComparisonOperationToAcl()

arm_compute::ComparisonOperation armnn::ConvertComparisonOperationToAcl ( const ComparisonDescriptor descriptor)
inline

Definition at line 141 of file ArmComputeUtils.hpp.

142 {
143  switch (descriptor.m_Operation)
144  {
145  case ComparisonOperation::Greater: return arm_compute::ComparisonOperation::Greater;
146  case ComparisonOperation::GreaterOrEqual: return arm_compute::ComparisonOperation::GreaterEqual;
147  case ComparisonOperation::Less: return arm_compute::ComparisonOperation::Less;
148  case ComparisonOperation::LessOrEqual: return arm_compute::ComparisonOperation::LessEqual;
149  case ComparisonOperation::Equal: return arm_compute::ComparisonOperation::Equal;
150  case ComparisonOperation::NotEqual: return arm_compute::ComparisonOperation::NotEqual;
151  default: throw InvalidArgumentException("Unsupported comparison function");
152  }
153 }

References Equal, Greater, GreaterOrEqual, Less, LessOrEqual, ComparisonDescriptor::m_Operation, and NotEqual.

Referenced by ClComparisonWorkload::ClComparisonWorkload(), and NeonComparisonWorkload::NeonComparisonWorkload().

◆ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo() [1/2]

arm_compute::FullyConnectedLayerInfo armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo ( const FullyConnectedDescriptor fullyConnectedDesc,
arm_compute::ActivationLayerInfo  activationLayerInfo 
)
inline

Definition at line 204 of file ArmComputeUtils.hpp.

206 {
207  arm_compute::FullyConnectedLayerInfo fc_info;
208  fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
209  fc_info.activation_info = activationLayerInfo;
210  return fc_info;
211 }

References FullyConnectedDescriptor::m_TransposeWeightMatrix.

◆ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo() [2/2]

arm_compute::FullyConnectedLayerInfo armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo ( const FullyConnectedDescriptor fullyConnectedDesc,
const ActivationDescriptor activationDesc 
)
inline

Definition at line 194 of file ArmComputeUtils.hpp.

196 {
197  arm_compute::FullyConnectedLayerInfo fc_info;
198  fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
199  fc_info.activation_info = ConvertActivationDescriptorToAclActivationLayerInfo(activationDesc);
200  return fc_info;
201 }

References ConvertActivationDescriptorToAclActivationLayerInfo(), and FullyConnectedDescriptor::m_TransposeWeightMatrix.

Referenced by ClFullyConnectedWorkload::ClFullyConnectedWorkload().

◆ ConvertLogSeverity()

constexpr LogSeverity armnn::ConvertLogSeverity ( BoostLogSeverityMapping  severity)
constexpr

Definition at line 206 of file Logging.hpp.

207 {
208  return static_cast<LogSeverity>(severity);
209 }

◆ ConvertLstmActivationFuncToAclLayerInfo()

arm_compute::ActivationLayerInfo armnn::ConvertLstmActivationFuncToAclLayerInfo ( uint32_t  activationFunction)
inline

Definition at line 118 of file ArmComputeUtils.hpp.

119 {
120  // For preparing the object for the class ActivationLayerInfo, we need to consider 5 situations.
121  switch (activationFunction)
122  {
123  case 0:
124  return arm_compute::ActivationLayerInfo(); // no activation, do nothing
125  case 1:
126  return arm_compute::ActivationLayerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
127  case 3:
128  return arm_compute::ActivationLayerInfo(
129  arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0);
130  case 4:
131  return arm_compute::ActivationLayerInfo(
132  arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0);
133  case 6:
134  return arm_compute::ActivationLayerInfo(
135  arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC);
136  default:
137  throw armnn::Exception("Wrong Type of Activation Function!");
138  }
139 }

◆ ConvertMaskToACLFormat()

int32_t ConvertMaskToACLFormat ( int32_t  mask,
int32_t  numDim 
)

Definition at line 299 of file WorkloadUtils.cpp.

300 {
301  int32_t reversedMask = 0;
302  for (unsigned int i = 0; i < armnn::numeric_cast<unsigned int>(numDim); ++i)
303  {
304  // Check if bit set in mask for each dimension
305  int32_t bit = (mask & 1 << i) != 0;
306  // Increment the new mask with the bits reversed
307  reversedMask += (bit << std::max(numDim-(armnn::numeric_cast<int>(i)+1), 0));
308  }
309 
310  return reversedMask;
311 }

Referenced by ClStridedSliceWorkload::ClStridedSliceWorkload(), and NeonStridedSliceWorkload::NeonStridedSliceWorkload().

◆ ConvertNormalizationAlgorithmChannelToAclNormType()

arm_compute::NormType armnn::ConvertNormalizationAlgorithmChannelToAclNormType ( NormalizationAlgorithmChannel  channelType)
inline

Definition at line 182 of file ArmComputeUtils.hpp.

183 {
184  using arm_compute::NormType;
185  switch (channelType)
186  {
187  case NormalizationAlgorithmChannel::Across: return NormType::CROSS_MAP;
188  case NormalizationAlgorithmChannel::Within: return NormType::IN_MAP_2D;
189  default: throw InvalidArgumentException("Unsupported normalization algorithm channel type");
190  }
191 }

References Across, and Within.

◆ ConvertOutputShapeRoundingToAclDimensionRoundingType()

arm_compute::DimensionRoundingType armnn::ConvertOutputShapeRoundingToAclDimensionRoundingType ( OutputShapeRounding  rounding)
inline

Definition at line 168 of file ArmComputeUtils.hpp.

170 {
171  using arm_compute::DimensionRoundingType;
172 
173  switch (rounding)
174  {
175  case OutputShapeRounding::Ceiling: return DimensionRoundingType::CEIL;
176  case OutputShapeRounding::Floor: return DimensionRoundingType::FLOOR;
177  default: throw InvalidArgumentException("Unsupported Output Shape Rounding type");
178  }
179 }

References Ceiling, and Floor.

◆ ConvertPaddingModeToAcl()

arm_compute::PaddingMode armnn::ConvertPaddingModeToAcl ( const PaddingMode paddingMode)
inline

Definition at line 295 of file ArmComputeUtils.hpp.

296 {
297  switch (paddingMode)
298  {
299  case PaddingMode::Constant: return arm_compute::PaddingMode::CONSTANT;
300  case PaddingMode::Reflect: return arm_compute::PaddingMode::REFLECT;
301  case PaddingMode::Symmetric: return arm_compute::PaddingMode::SYMMETRIC;
302  default: throw InvalidArgumentException("Unsupported Padding Mode");
303  }
304 }

References Constant, Reflect, and Symmetric.

◆ ConvertPoolingAlgorithmToAclPoolingType()

arm_compute::PoolingType armnn::ConvertPoolingAlgorithmToAclPoolingType ( PoolingAlgorithm  poolingAlgorithm)
inline

Definition at line 155 of file ArmComputeUtils.hpp.

156 {
157  using arm_compute::PoolingType;
158 
159  switch (poolingAlgorithm)
160  {
161  case PoolingAlgorithm::Max: return PoolingType::MAX;
162  case PoolingAlgorithm::Average: return PoolingType::AVG;
163  case PoolingAlgorithm::L2: return PoolingType::L2;
164  default: throw InvalidArgumentException("Unsupported pooling algorithm");
165  }
166 }

References Average, L2, and Max.

Referenced by CreatePool2dAttributes().

◆ ConvertReductionOperationToAcl()

arm_compute::ReductionOperation armnn::ConvertReductionOperationToAcl ( const ReduceDescriptor descriptor)
inline

Definition at line 306 of file ArmComputeUtils.hpp.

307 {
308  switch (descriptor.m_ReduceOperation)
309  {
310  case ReduceOperation::Sum: return arm_compute::ReductionOperation::SUM;
311  case ReduceOperation::Mean: return arm_compute::ReductionOperation::MEAN_SUM;
312  case ReduceOperation::Max: return arm_compute::ReductionOperation::MAX;
313  case ReduceOperation::Min: return arm_compute::ReductionOperation::MIN;
314  case ReduceOperation::Prod: return arm_compute::ReductionOperation::PROD;
315  default: throw InvalidArgumentException("Unsupported Reduction operation");
316  }
317 }

References ReduceDescriptor::m_ReduceOperation, Max, Mean, Min, Prod, and Sum.

◆ ConvertResizeMethodToAclInterpolationPolicy()

arm_compute::InterpolationPolicy armnn::ConvertResizeMethodToAclInterpolationPolicy ( ResizeMethod  resizeMethod)
inline

Definition at line 213 of file ArmComputeUtils.hpp.

214 {
215  switch (resizeMethod)
216  {
217  case ResizeMethod::Bilinear:
218  return arm_compute::InterpolationPolicy::BILINEAR;
219  case ResizeMethod::NearestNeighbor:
220  return arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR;
221  default:
222  throw InvalidArgumentException("Unsupported resize method");
223  }
224 }

References Bilinear, and NearestNeighbor.

◆ ConvertWeightTensorFromArmnnToAcl()

armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl ( const ConstTensorHandle weightTensor,
DataLayout  dataLayout,
void *  permuteBuffer 
)

Definition at line 237 of file WorkloadUtils.cpp.

240 {
241  if (weightTensor == nullptr)
242  {
243  throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null input tensor pointer");
244  }
245  if (permuteBuffer == nullptr)
246  {
247  throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null permute buffer pointer");
248  }
249 
250  auto multiplier = weightTensor->GetTensorInfo().GetShape()[0];
251  auto inputChannels = weightTensor->GetTensorInfo().GetShape()[1];
252 
253  // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
254  // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
255 
256  // 1. Permute the weights if necessary
257  // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
258  // starting from the current shape of [ M, I, H, W ]
259  // If no permutation is necessary, leave the permutation vector empty
260  PermutationVector permutationVector{};
261  if (dataLayout == DataLayout::NHWC)
262  {
263  // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
264  permutationVector = { 3, 2, 0, 1 };
265  }
266  ConstTensor weightPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
267 
268  // Shuffle the weights data to obtain the channel order needed used by Acl
269  if (multiplier > 1 && inputChannels > 1 && dataLayout == DataLayout::NCHW)
270  {
271  switch (weightPermuted.GetDataType())
272  {
273  case DataType::Float32:
274  weightPermuted = ReorderWeightChannelsForAcl<float>(weightPermuted, dataLayout, permuteBuffer);
275  break;
276  case DataType::Float16:
277  weightPermuted =
278  ReorderWeightChannelsForAcl<half_float::half>(weightPermuted, dataLayout, permuteBuffer);
279  break;
280  case DataType::QAsymmS8:
281  case DataType::QAsymmU8:
282  weightPermuted = ReorderWeightChannelsForAcl<uint8_t>(weightPermuted, dataLayout, permuteBuffer);
283  break;
284  case DataType::QSymmS8:
285  weightPermuted = ReorderWeightChannelsForAcl<int8_t>(weightPermuted, dataLayout, permuteBuffer);
286  break;
287  default:
288  break;
289  }
290  }
291 
292  // 2. Reshape the weights
293  ReshapeWeightsForAcl(weightPermuted.GetInfo(), dataLayout);
294 
295  // 3. Return both the tensor and the allocated storage to ensure that the data stays alive
296  return weightPermuted;
297 }

References Float16, Float32, BaseTensor< MemoryType >::GetDataType(), BaseTensor< MemoryType >::GetInfo(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), NCHW, NHWC, PermuteTensor(), QAsymmS8, QAsymmU8, QSymmS8, and ReshapeWeightsForAcl().

◆ ConvertWeightTensorInfoFromArmnnToAcl()

TensorInfo ConvertWeightTensorInfoFromArmnnToAcl ( const TensorInfo weightInfo,
DataLayout  dataLayout 
)

Definition at line 122 of file WorkloadUtils.cpp.

123 {
124  // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
125  // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
126 
127  // 1. Permute the weights if necessary
128  // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
129  // starting from the current shape of [ M, I, H, W ]
130  TensorInfo weightPermutedInfo(weightInfo);
131  if (dataLayout == DataLayout::NHWC)
132  {
133  // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
134  PermutationVector permutationVector{ 3, 2, 0, 1 };
135  weightPermutedInfo = armnnUtils::Permuted(weightInfo, permutationVector);
136  }
137 
138  // 2. Reshape the weights
139  ReshapeWeightsForAcl(weightPermutedInfo, dataLayout);
140 
141  // 3. Return the permuted weight info
142  return weightPermutedInfo;
143 }

References NHWC, armnnUtils::Permuted(), and ReshapeWeightsForAcl().

◆ Convolve()

void Convolve ( const TensorShape rInputShape,
Decoder< float > &  rInputDecoder,
const TensorShape rOutputShape,
Encoder< float > &  rOutputEncoder,
const TensorShape rFilterShape,
Decoder< float > &  rFilterDecoder,
bool  biasEnabled,
Decoder< float > *  pBiasDecoder,
DataLayout  dataLayout,
unsigned int  paddingTop,
unsigned int  paddingLeft,
unsigned int  xStride,
unsigned int  yStride,
unsigned int  xDilation,
unsigned int  yDilation,
bool  depthwise 
)

Definition at line 68 of file ConvImpl.cpp.

84 {
85  if (biasEnabled && !pBiasDecoder)
86  {
87  throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
88  }
89  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
90 
91  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
92  const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
93  const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
94 
95  // Weights layout:
96  // Conv2d: [O,H,W,I]
97  // Depthwise: [1,H,W,O]
98  const unsigned int inputChannels = rInputShape[channelsIndex];
99  const unsigned int outputChannels = rOutputShape[channelsIndex];
100  const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
101 
102  const unsigned int batchSize = rOutputShape[0];
103  const unsigned int outputHeight = rOutputShape[heightIndex];
104  const unsigned int outputWidth = rOutputShape[widthIndex];
105  const unsigned int inputHeight = rInputShape[heightIndex];
106  const unsigned int inputWidth = rInputShape[widthIndex];
107 
108  const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
109  const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
110 
111  const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
112  const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);
113 
114  const TensorShape biasShape{outputChannels};
115  const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
116 
117  for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
118  {
119  for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
120  {
121  for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
122  {
123  for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
124  {
125  // This loop goes over each output element.
126  float sum = 0.0f;
127 
128  // For depthwise, each output channel corresponds to exactly one input channel.
129  // For normal, must loop over each input channel.
130  for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
131  {
132  for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
133  {
134  for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
135  {
136  // This loop goes over each input element for each output element.
137  unsigned int filterIndex = 0;
138 
139  // Since dimensionality of kernel depends on depthwiseness, so does index.
140  if (depthwise)
141  {
142  cInput = cOutput / depthMultiplier;
143  // filterDepth = outputChannels;
144  filterIndex = xFilter * outputChannels + cOutput +
145  yFilter * filterWidth * outputChannels;
146  }
147  else
148  {
149  // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
150  // performance regression.
151  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
152  {
153  filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
154  yFilter * filterWidth * inputChannels +
155  xFilter * inputChannels +
156  cInput;
157  }
158  else
159  {
160  filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
161  cInput * filterWidth * filterHeight +
162  yFilter * filterWidth +
163  xFilter;
164  }
165  }
166 
167  unsigned int yInput = yOutput * yStride + yFilter * yDilation;
168  unsigned int xInput = xOutput * xStride + xFilter * xDilation;
169 
170  float inputValue;
171 
172  // Check if we're in the padding.
173  if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
174  xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
175  {
176  inputValue = 0.0f;
177  }
178  else
179  {
180  unsigned int inputIndex = 0;
181 
182  // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
183  // performance regression.
184  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
185  {
186  inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
187  (yInput - paddingTop) * inputWidth * inputChannels +
188  (xInput - paddingLeft) * inputChannels +
189  cInput;
190  }
191  else
192  {
193  inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
194  inputWidth * inputHeight * cInput +
195  inputWidth * (yInput - paddingTop) +
196  xInput - paddingLeft;
197  }
198  inputValue = inputVec[inputIndex];
199  }
200 
201  sum += filterVec[filterIndex] * inputValue;
202  }
203  }
204  }
205 
206  if (biasEnabled)
207  {
208  sum += biasVec[cOutput];
209  }
210 
211  unsigned int outIdx;
212  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
213  {
214  outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
215  yOutput * outputWidth * outputChannels +
216  xOutput * outputChannels +
217  cOutput;
218  }
219  else
220  {
221  outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
222  cOutput * outputHeight * outputWidth +
223  yOutput * outputWidth +
224  xOutput;
225  }
226 
227  rOutputEncoder[outIdx];
228  rOutputEncoder.Set(sum);
229  }
230  }
231  }
232  }
233 }

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), NHWC, and Encoder< IType >::Set().

◆ Convolve3d()

void Convolve3d ( const TensorShape rInputShape,
Decoder< float > &  rInputDecoder,
const TensorShape rOutputShape,
Encoder< float > &  rOutputEncoder,
const TensorShape rFilterShape,
Decoder< float > &  rFilterDecoder,
bool  biasEnabled,
Decoder< float > *  pBiasDecoder,
DataLayout  dataLayout,
unsigned int  paddingTop,
unsigned int  paddingLeft,
unsigned int  paddingFront,
unsigned int  xStride,
unsigned int  yStride,
unsigned int  zStride,
unsigned int  xDilation,
unsigned int  yDilation,
unsigned int  zDilation 
)

Definition at line 11 of file Conv3dImpl.cpp.

29 {
30  if (biasEnabled && !pBiasDecoder)
31  {
32  throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
33  }
34  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
35 
36  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
37  const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
38  const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
39  const unsigned int depthIndex = dataLayoutIndexed.GetDepthIndex();
40 
41  const unsigned int inChannels = rInputShape[channelsIndex];
42  const unsigned int outChannels = rOutputShape[channelsIndex];
43 
44  const unsigned int batchSize = rOutputShape[0];
45  const unsigned int outputHeight = rOutputShape[heightIndex];
46  const unsigned int outputWidth = rOutputShape[widthIndex];
47  const unsigned int outputDepth = rOutputShape[depthIndex];
48  const unsigned int inputHeight = rInputShape[heightIndex];
49  const unsigned int inputWidth = rInputShape[widthIndex];
50  const unsigned int inputDepth = rInputShape[depthIndex];
51 
52  // Conv3d weights layout: [D,H,W,I,O]
53  const unsigned int filterDepth = rFilterShape[0];
54  const unsigned int filterHeight = rFilterShape[1];
55  const unsigned int filterWidth = rFilterShape[2];
56 
57  const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
58  const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape);
59 
60  const TensorShape biasShape{outChannels};
61  const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
62 
63  for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
64  {
65  for (unsigned int zOutput = 0; zOutput < outputDepth; zOutput++)
66  {
67  for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
68  {
69  for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
70  {
71  for (unsigned int cOutput = 0; cOutput < outChannels; cOutput++)
72  {
73  // This loop goes over each output element.
74  float sum = 0.0f;
75 
76  // Loop over each input channel.
77  for (unsigned int zFilter = 0; zFilter < filterDepth; zFilter++)
78  {
79  for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
80  {
81  for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
82  {
83  for (unsigned int cInput = 0; cInput < inChannels; cInput++)
84  {
85  // This loop goes over each input element for each output element.
86  unsigned int filterIndex = 0;
87 
88  // Conv3d weights layout: [D,H,W,I,O]
89  // Keep this implementation, as using DataLayoutIndexed::GetIndex
90  // causes large performance regression.
91  filterIndex = zFilter * filterHeight * filterWidth * inChannels * outChannels +
92  yFilter * filterWidth * inChannels * outChannels +
93  xFilter * inChannels * outChannels +
94  cInput * outChannels +
95  cOutput;
96 
97  unsigned int yInput = yOutput * yStride + yFilter * yDilation;
98  unsigned int xInput = xOutput * xStride + xFilter * xDilation;
99  unsigned int zInput = zOutput * zStride + zFilter * zDilation;
100 
101  float inputValue;
102 
103  // Check if we're in the padding.
104  if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
105  xInput < paddingLeft || xInput >= inputWidth + paddingLeft ||
106  zInput < paddingFront || zInput >= inputDepth + paddingFront)
107  {
108  inputValue = 0.0f;
109  }
110  else
111  {
112  unsigned int inputIndex = 0;
113 
114  // Keep this implementation, as using DataLayoutIndexed::GetIndex
115  // causes large performance regression.
116  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)
117  {
118  inputIndex =
119  batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
120  (zInput-paddingFront) * inputHeight * inputWidth * inChannels +
121  (yInput-paddingTop) * inputWidth * inChannels +
122  (xInput-paddingLeft) * inChannels +
123  cInput;
124  }
125  else
126  {
127  // NCDHW DataLayout
128  inputIndex =
129  batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
130  inputDepth * inputHeight * inputWidth * cInput +
131  (zInput-paddingFront) * inputHeight * inputWidth +
132  (yInput-paddingTop) * inputWidth +
133  xInput-paddingLeft;
134  }
135 
136  inputValue = inputVec[inputIndex];
137  }
138 
139  sum += filterVec[filterIndex] * inputValue;
140  }
141  }
142  }
143  }
144 
145  if (biasEnabled)
146  {
147  sum += biasVec[cOutput];
148  }
149 
150  unsigned int outIdx;
151  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)
152  {
153  outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
154  zOutput * outputHeight * outputWidth * outChannels +
155  yOutput * outputWidth * outChannels +
156  xOutput * outChannels +
157  cOutput;
158  }
159  else
160  {
161  // NCDHW DataLayout
162  outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
163  cOutput * outputDepth * outputHeight * outputWidth +
164  zOutput * outputHeight * outputWidth +
165  yOutput * outputWidth +
166  xOutput;
167  }
168 
169  rOutputEncoder[outIdx];
170  rOutputEncoder.Set(sum);
171  }
172  }
173  }
174  }
175  }
176 }

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetDepthIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), NDHWC, and Encoder< IType >::Set().

◆ CoordinatesToIndex()

uint32_t armnn::CoordinatesToIndex ( TensorShape shape,
std::vector< uint32_t > &  coordinates 
)

Definition at line 32 of file Tile.cpp.

33 {
34  uint32_t index = 0;
35  uint32_t base = 1;
36  uint32_t rank = shape.GetNumDimensions();
37  for (uint32_t i = rank; i > 0; --i)
38  {
39  index = index + coordinates[i - 1] * base;
40  base = base * shape[i - 1];
41  }
42  return index;
43 }

References TensorShape::GetNumDimensions().

Referenced by Tile().

◆ CopyArmComputeClTensorData()

void CopyArmComputeClTensorData ( arm_compute::CLTensor &  dstTensor,
const T *  srcData 
)

Definition at line 64 of file ClWorkloadUtils.hpp.

65 {
66  {
67  ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting");
68  dstTensor.map(true);
69  }
70 
71  {
72  ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor");
73  armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor);
74  }
75 
76  dstTensor.unmap();
77 }

References ARMNN_SCOPED_PROFILING_EVENT_CL.

Referenced by ClConstantWorkload::Execute(), and GpuFsaConstantWorkload::Execute().

◆ CopyArmComputeTensorData()

void armnn::CopyArmComputeTensorData ( arm_compute::Tensor &  dstTensor,
const T *  srcData 
)

Definition at line 63 of file NeonWorkloadUtils.hpp.

64 {
65  InitialiseArmComputeTensorEmpty(dstTensor);
66  CopyArmComputeITensorData(srcData, dstTensor);
67 }

Referenced by InitializeArmComputeTensorData().

◆ CopyTensorContentsGeneric()

void armnn::CopyTensorContentsGeneric ( const ITensorHandle srcTensor,
ITensorHandle dstTensor,
CopyFunc  copy 
)

Definition at line 46 of file WorkloadUtils.hpp.

47 {
48  // For ease of understanding, names are assigned to the dimensions
49  // of the tensor as if NHWC, however this routine works with any 5D tensor
50  static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
51 
52  TensorShape srcStrides = srcTensor->GetStrides();
53  const TensorShape& srcShape = srcTensor->GetShape();
54  const auto srcSize = srcTensor->GetStrides()[0] * srcShape[0];
55  TensorShape dstStrides = dstTensor->GetStrides();
56  const TensorShape& dstShape = dstTensor->GetShape();
57  const auto dstSize = dstTensor->GetStrides()[0] * dstShape[0];
58 
59  size_t srcDepth = 1;
60  size_t srcBatches = 1;
61  size_t srcHeight = 1;
62  size_t srcWidth = 1;
63  size_t srcChannels = 1;
64  AssignValues(srcShape.GetNumDimensions(),
65  0,
66  srcShape,
67  srcChannels,
68  srcWidth,
69  srcHeight,
70  srcBatches,
71  srcDepth);
72 
73  size_t srcDepthStride = 0;
74  size_t srcBatchStride = 0;
75  size_t srcHeightStride = 0;
76  size_t srcWidthStride = 0;
77  size_t srcChannelStride = 0;
78  AssignValues(srcStrides.GetNumDimensions(),
79  0,
80  srcStrides,
81  srcChannelStride,
82  srcWidthStride,
83  srcHeightStride,
84  srcBatchStride,
85  srcDepthStride);
86 
87  size_t dstDepth = 1;
88  size_t dstBatches = 1;
89  size_t dstHeight = 1;
90  size_t dstWidth = 1;
91  size_t dstChannels = 1;
92  AssignValues(dstShape.GetNumDimensions(),
93  0,
94  dstShape,
95  dstChannels,
96  dstWidth,
97  dstHeight,
98  dstBatches,
99  dstDepth);
100 
101  size_t dstDepthStride = 0;
102  size_t dstBatchStride = 0;
103  size_t dstHeightStride = 0;
104  size_t dstWidthStride = 0;
105  size_t dstChannelStride = 0;
106  AssignValues(dstStrides.GetNumDimensions(),
107  0,
108  dstStrides,
109  dstChannelStride,
110  dstWidthStride,
111  dstHeightStride,
112  dstBatchStride,
113  dstDepthStride);
114 
115  const unsigned char* srcDataStart;
116  unsigned char* dstDataStart;
117  {
118  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
119  srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
120  dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
121  }
122  if (srcDataStart == nullptr)
123  {
124  throw MemoryValidationException("The source tensor is null.");
125  }
126  if (dstDataStart == nullptr)
127  {
128  throw MemoryValidationException("The destination tensor is null.");
129  }
130 
131  size_t copyLength = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
132  size_t copyWidth = std::min(srcWidth, dstWidth);
133  size_t copyHeight = std::min(srcHeight, dstHeight);
134  size_t copyBatches = std::min(srcBatches, dstBatches);
135  size_t copyDepth = std::min(srcDepth, dstDepth);
136 
137  // Coalesce inner dimensions where possible
138  // to reduce overheard calling copy() and to
139  // allow for memory bandwidth optimisations
140  if (copyLength == srcWidthStride &&
141  copyLength == dstWidthStride)
142  {
143  // There is no special padding between rows,
144  // and sizes are compatible, so copy whole rows
145  copyLength *= copyWidth;
146  copyWidth = 1;
147 
148  if (copyLength == srcHeightStride &&
149  copyLength == dstHeightStride)
150  {
151  // There is no special padding between batches
152  // and sizes are compatible so copy whole batches
153  copyLength *= copyHeight;
154  copyHeight = 1;
155  }
156  }
157 
158  const unsigned char* srcData = srcDataStart;
159  unsigned char* dstData = dstDataStart;
160  for (unsigned int d = 0; d < copyDepth; ++d)
161  {
162  auto srcPtrDepth = srcData;
163  auto dstPtrDepth = dstData;
164  for (unsigned int b = 0; b < copyBatches; ++b)
165  {
166  auto srcPtrBatch = srcData;
167  auto dstPtrBatch = dstData;
168  for (unsigned int h = 0; h < copyHeight; ++h)
169  {
170  auto srcPtrChannel = srcData;
171  auto dstPtrChannel = dstData;
172  for (unsigned int w = 0; w < copyWidth; ++w)
173  {
174  // Sanity check the memory area we've been asked to copy from and to.
175  if (copyLength > srcSize)
176  {
177  throw MemoryValidationException(
178  "The source tensor size does not match the size of the allocated tensor.");
179  }
180  if (copyLength > dstSize)
181  {
182  throw MemoryValidationException(
183  "The destination tensor size will overrun the destination tensor.");
184  }
185  copy(dstData, srcData, copyLength);
186  dstData += dstWidthStride;
187  srcData += srcWidthStride;
188  }
189  dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
190  srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
191  }
192  dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
193  srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
194  }
195  dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
196  srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
197  }
198 
199  srcTensor->Unmap();
200  dstTensor->Unmap();
201 }

References ARMNN_SCOPED_PROFILING_EVENT, TensorShape::GetNumDimensions(), ITensorHandle::GetShape(), ITensorHandle::GetStrides(), ITensorHandle::Map(), MaxNumOfTensorDimensions, Undefined, and ITensorHandle::Unmap().

Referenced by CopyToOutputTensor(), CopyMemGenericWorkload::Execute(), NeonConvertFp32ToFp16Workload::Execute(), NeonConvertFp16ToFp32Workload::Execute(), and CopyMemGenericWorkload::ExecuteAsync().

◆ CopyToOutputTensor()

void armnn::CopyToOutputTensor ( const Tensor outputTensor,
ITensorHandle outputTensorHandle 
)

Definition at line 1444 of file LoadedNetwork.cpp.

1445 {
1446  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyOutput");
1447  auto copyFunc = [](void* dst, const void* src, size_t size)
1448  {
1449  memcpy(dst, src, size);
1450  };
1451 
1452  std::unique_ptr<ITensorHandle> tensorHandle =
1453  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1454  outputTensor.GetMemoryArea());
1455 
1456  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1457 }

References ARMNN_SCOPED_PROFILING_EVENT, CopyTensorContentsGeneric(), BaseTensor< MemoryType >::GetInfo(), BaseTensor< MemoryType >::GetMemoryArea(), and Undefined.

Referenced by LoadedNetwork::Execute().

◆ CreateAclNormalizationLayerInfoForL2Normalization()

arm_compute::NormalizationLayerInfo armnn::CreateAclNormalizationLayerInfoForL2Normalization ( const armnn::TensorInfo tensorInfo,
armnn::DataLayout  dataLayout 
)
inline

Definition at line 29 of file ArmComputeUtils.hpp.

31 {
32  unsigned int depthDimension = dataLayout == armnn::DataLayout::NCHW ? 1 : 3;
33  const unsigned int depth = tensorInfo.GetShape()[depthDimension];
34 
35  // At the time of writing, {CL|Neon}L2Normalization performs the reduction only along dimension 0. This version of
36  // L2 Normalization always performs the reduction along the depth axis, though. Thus, we repurpose
37  // {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by carefully chosing the normalization
38  // parameters.
39  //
40  // Please refer to both the reference implementation of the normalization layer and the implementation of
41  // {CL|Neon}NormalizationLayer when checking the derivations for the parameter values below.
42 
43  // Make sure normalization covers the entire depth range. ACL requires the normalization size to be odd.
44  // CL: This does not result in extra kernel threads not doing any work: See usage of the RADIUS parameter in
45  // ACL's normalization_layer_cross_map() CL function.
46  const uint32_t normSize = depth * 2u + 1u;
47 
48  // See ACL's NormalizationLayerInfo::scale_coeff() definition.
49  // For the reference implementation, to make alpha_ become 1, we'd have to use alpha = normSize instead.
50  const float alpha = 1.0f;
51 
52  // Don't offset the reduction.
53  const float kappa = 0.0f;
54 
55  // pow(reduction, -0.5) = 1 / sqrt(reduction)
56  const float beta = 0.5f;
57 
58  return arm_compute::NormalizationLayerInfo(arm_compute::NormType::CROSS_MAP, normSize, alpha, beta, kappa, false);
59 }

References TensorInfo::GetShape(), and NCHW.

◆ CreateClContext()

flatbuffers::Offset<ClContext> armnn::CreateClContext ( flatbuffers::FlatBufferBuilder &  _fbb,
flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>>  programs = 0 
)
inline

Definition at line 57 of file ClContextSchema_generated.h.

59  {
60  ClContextBuilder builder_(_fbb);
61  builder_.add_programs(programs);
62  return builder_.Finish();
63 }

References ClContextBuilder::add_programs(), and ClContextBuilder::Finish().

Referenced by CreateClContextDirect(), and ClContextSerializer::Serialize().

◆ CreateClContextDirect()

flatbuffers::Offset<ClContext> armnn::CreateClContextDirect ( flatbuffers::FlatBufferBuilder &  _fbb,
const std::vector< flatbuffers::Offset< armnn::Program >> *  programs = nullptr 
)
inline

Definition at line 65 of file ClContextSchema_generated.h.

67  {
68  auto programs__ = programs ? _fbb.CreateVector<flatbuffers::Offset<armnn::Program>>(*programs) : 0;
70  _fbb,
71  programs__);
72 }

References CreateClContext().

◆ CreateDescriptorForConcatenation()

OriginsDescriptor armnn::CreateDescriptorForConcatenation ( TensorShapeIt  first,
TensorShapeIt  last,
unsigned int  concatenationDimension 
)

Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing concatenation of a number of input tensors.

Definition at line 300 of file Descriptors.hpp.

303 {
304  auto numInputs = std::distance(first, last);
305 
306  if (numInputs < 2)
307  {
308  throw InvalidArgumentException("Concatenation requires at least 2 inputs");
309  }
310 
311  const auto& firstInputShape = *first;
312 
313  const unsigned int numDimensions = firstInputShape.GetNumDimensions();
314  for (auto it = first + 1; it != last; ++it)
315  {
316  if (it->GetNumDimensions() != numDimensions)
317  {
318  throw InvalidArgumentException("All inputs to concatenation must have the same number of dimensions");
319  }
320  }
321 
322  if (concatenationDimension >= numDimensions)
323  {
324  throw InvalidArgumentException("concatenationDimension must be between 0 and the number of dimensions.");
325  }
326 
327  for (auto it = first; it != last; ++it)
328  {
329  for (unsigned int d = 0; d < numDimensions; ++d)
330  {
331  const bool dimSizeOk = (d == concatenationDimension) || (firstInputShape[d] == (*it)[d]);
332  if (!dimSizeOk)
333  {
334  throw InvalidArgumentException("All inputs to concatenation must be the same size along all dimensions "
335  " except the concatenation dimension");
336  }
337  }
338  }
339 
340  OriginsDescriptor viewsDescriptor(static_cast<uint32_t>(numInputs), numDimensions);
341  viewsDescriptor.SetConcatAxis(concatenationDimension);
342 
343  uint32_t viewIndex = 0u;
344  uint32_t coordAlongConcatDim = 0u;
345  for (auto it = first; it != last; ++it)
346  {
347  const auto& inputShape = *it;
348 
349  for (unsigned int i = 0; i < concatenationDimension; ++i)
350  {
351  viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
352  }
353 
354  viewsDescriptor.SetViewOriginCoord(viewIndex, concatenationDimension, coordAlongConcatDim);
355  unsigned int dimSize = inputShape[concatenationDimension];
356  coordAlongConcatDim += dimSize;
357 
358 
359  for (unsigned int i = concatenationDimension + 1; i < numDimensions; ++i)
360  {
361  viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
362  }
363 
364  ++viewIndex;
365  }
366 
367  return viewsDescriptor;
368 }

References OriginsDescriptor::SetConcatAxis(), and OriginsDescriptor::SetViewOriginCoord().

◆ CreateInputsFrom()

SubgraphView::InputSlots armnn::CreateInputsFrom ( Layer layer)
inline

Definition at line 42 of file GpuFsaBackend.cpp.

43 {
44  SubgraphView::InputSlots result;
45  for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it)
46  {
47  result.push_back(&(*it));
48  }
49  return result;
50 }

References Layer::BeginInputSlots(), and Layer::EndInputSlots().

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ CreateOutputsFrom()

SubgraphView::OutputSlots armnn::CreateOutputsFrom ( Layer layer)
inline

Definition at line 52 of file GpuFsaBackend.cpp.

53 {
54  SubgraphView::OutputSlots result;
55  for (auto&& it = layer->BeginOutputSlots(); it != layer->EndOutputSlots(); ++it)
56  {
57  result.push_back(&(*it));
58  }
59  return result;
60 }

References Layer::BeginOutputSlots(), and Layer::EndOutputSlots().

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ CreateProgram()

flatbuffers::Offset<Program> armnn::CreateProgram ( flatbuffers::FlatBufferBuilder &  _fbb,
flatbuffers::Offset< flatbuffers::String >  name = 0,
flatbuffers::Offset< flatbuffers::Vector< uint8_t >>  binary = 0 
)
inline

Definition at line 118 of file ClContextSchema_generated.h.

121  {
122  ProgramBuilder builder_(_fbb);
123  builder_.add_binary(binary);
124  builder_.add_name(name);
125  return builder_.Finish();
126 }

References ProgramBuilder::add_binary(), ProgramBuilder::add_name(), and ProgramBuilder::Finish().

Referenced by CreateProgramDirect(), and ClContextSerializer::Serialize().

◆ CreateProgramDirect()

flatbuffers::Offset<Program> armnn::CreateProgramDirect ( flatbuffers::FlatBufferBuilder &  _fbb,
const char *  name = nullptr,
const std::vector< uint8_t > *  binary = nullptr 
)
inline

Definition at line 128 of file ClContextSchema_generated.h.

131  {
132  auto name__ = name ? _fbb.CreateString(name) : 0;
133  auto binary__ = binary ? _fbb.CreateVector<uint8_t>(*binary) : 0;
134  return armnn::CreateProgram(
135  _fbb,
136  name__,
137  binary__);
138 }

References CreateProgram().

◆ CreateSubgraphViewFrom()

SubgraphView::SubgraphViewPtr armnn::CreateSubgraphViewFrom ( SubgraphView::InputSlots &&  inputs,
SubgraphView::OutputSlots &&  outputs,
SubgraphView::Layers &&  layers 
)
inline

Definition at line 62 of file GpuFsaBackend.cpp.

65 {
66  return std::make_unique<SubgraphView>(std::move(inputs), std::move(outputs), std::move(layers));
67 }

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ CreateSupportedBackends()

BackendsMap CreateSupportedBackends ( TensorHandleFactoryRegistry handleFactoryRegistry,
BackendSettings backendSettings 
)

Definition at line 1302 of file Network.cpp.

1304 {
1305  BackendsMap backends;
1306  auto const& backendRegistry = BackendRegistryInstance();
1307  for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
1308  {
1309  auto backendFactory = backendRegistry.GetFactory(selectedBackend);
1310  auto backendObjPtr = backendFactory();
1311 
1312  backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
1313 
1314  backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
1315  }
1316 
1317  return backends;
1318 }

References BackendRegistryInstance(), and BackendSettings::m_SupportedBackends.

Referenced by Optimize().

◆ Debug()

void Debug ( const TensorInfo inputInfo,
const T *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

Definition at line 97 of file Debug.cpp.

103 {
104  if (outputsToFile)
105  {
106 #if !defined(ARMNN_DISABLE_FILESYSTEM)
107  fs::path tmpDir = fs::temp_directory_path();
108  std::ofstream out(tmpDir.generic_string() + "/ArmNNIntermediateLayerOutputs/" + layerName + ".numpy");
109  PrintOutput<T>(inputInfo, inputData, guid, layerName, slotIndex, out);
110  out.close();
111 #endif
112  }
113  else
114  {
115  PrintOutput<T>(inputInfo, inputData, guid, layerName, slotIndex, std::cout);
116  }
117 }

◆ Debug< BFloat16 >()

template void armnn::Debug< BFloat16 > ( const TensorInfo inputInfo,
const BFloat16 inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< float >()

template void armnn::Debug< float > ( const TensorInfo inputInfo,
const float *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< Half >()

template void armnn::Debug< Half > ( const TensorInfo inputInfo,
const Half inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< int16_t >()

template void armnn::Debug< int16_t > ( const TensorInfo inputInfo,
const int16_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< int32_t >()

template void armnn::Debug< int32_t > ( const TensorInfo inputInfo,
const int32_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< int64_t >()

template void armnn::Debug< int64_t > ( const TensorInfo inputInfo,
const int64_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< int8_t >()

template void armnn::Debug< int8_t > ( const TensorInfo inputInfo,
const int8_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< uint8_t >()

template void armnn::Debug< uint8_t > ( const TensorInfo inputInfo,
const uint8_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ DeleteAsType()

void DeleteAsType ( const void *const  blob)
inline

Definition at line 37 of file GpuFsaBackend.cpp.

38 {
39  delete static_cast<const T*>(blob);
40 }

◆ DepthToSpace()

void DepthToSpace ( const TensorInfo inputInfo,
const DepthToSpaceDescriptor descriptor,
const void *  inputData,
void *  outputData,
unsigned int  dataTypeSize 
)

Definition at line 16 of file DepthToSpace.cpp.

21 {
22  const unsigned int blockSize = descriptor.m_BlockSize;
23 
24  const TensorShape& inputShape = inputInfo.GetShape();
25  const unsigned int batches = inputShape[0];
26 
27  armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
28  const unsigned int inDepth = inputShape[dataLayoutIndexed.GetChannelsIndex()];
29  const unsigned int inHeight = inputShape[dataLayoutIndexed.GetHeightIndex()];
30  const unsigned int inWidth = inputShape[dataLayoutIndexed.GetWidthIndex()];
31 
32  const unsigned int outDepth = inDepth / (blockSize * blockSize);
33 
34  // The 4D input data can be interpreted as 6D (implicitly reshaped) as follows:
35  //
36  // [batch, block size, block size, inDepth, inHeight, inWidth] for NCHW and
37  // [batch, inHeight, inWidth, blockSize, blockSize, outDepth] for NHWC.
38  //
39  // DepthToSpace can then be implemented as a permutation in 6D resulting in
40  // the following shapes:
41  //
42  // [batch, outDepth, inHeight, blockSize, inWidth, blockSize] for NCHW and
43  // [batch, inHeight, blockSize, inWidth, blockSize, outDepth] for NHWC.
44  //
45  // NOTE:
46  // Since 6D tensors are not currently supported, in practice we need to handle each
47  // batch separately and execute 5D permutations
48 
49  TensorShape permDestShape;
50  PermutationVector permVector{};
51  if (descriptor.m_DataLayout == DataLayout::NCHW)
52  {
53  permDestShape = TensorShape({ outDepth, inHeight, blockSize, inWidth, blockSize });
54  permVector = { 2, 4, 0, 1, 3 };
55  }
56  else
57  {
58  permDestShape = TensorShape({ inHeight, blockSize, inWidth, blockSize, outDepth });
59  permVector = { 0, 2, 1, 3, 4 };
60  }
61 
62  const unsigned int numElementsPerBatch = inputShape.GetNumElements() / batches;
63 
64  for (unsigned int batchIndex = 0u; batchIndex < batches; ++batchIndex)
65  {
66  const uintptr_t batchDataOffset = batchIndex * (numElementsPerBatch * dataTypeSize);
67 
68  armnnUtils::Permute(permDestShape,
69  permVector,
70  static_cast<const void*>(reinterpret_cast<const uint8_t*>(inputData) + batchDataOffset),
71  static_cast<void*>(reinterpret_cast<uint8_t*>(outputData) + batchDataOffset),
72  dataTypeSize);
73  }
74 }

References DepthToSpace(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumElements(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToDepthDescriptor::m_BlockSize, SpaceToDepthDescriptor::m_DataLayout, and armnnUtils::Permute().

Referenced by DepthToSpace().

◆ Dequantize() [1/4]

void armnn::Dequantize ( const T *  inputData,
float *  outputData,
const TensorInfo info 
)
inline

Definition at line 113 of file RefWorkloadUtils.hpp.

114 {
115  for (unsigned int i = 0; i < info.GetNumElements(); i++)
116  {
117  outputData[i] = Dequantize<T>(inputData[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
118  }
119 }

References info.

◆ Dequantize() [2/4]

std::vector<float> armnn::Dequantize ( const T *  quant,
const TensorInfo info 
)

u8 helpers

Definition at line 102 of file RefWorkloadUtils.hpp.

103 {
104  std::vector<float> ret(info.GetNumElements());
105  for (size_t i = 0; i < info.GetNumElements(); i++)
106  {
107  ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
108  }
109  return ret;
110 }

References Dequantize(), and info.

◆ Dequantize() [3/4]

void Dequantize ( Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder,
const TensorInfo inputInfo,
const TensorInfo outputInfo 
)

Definition at line 11 of file Dequantize.cpp.

15 {
17  inputInfo.GetNumElements() == outputInfo.GetNumElements(),
18  "Dequantize: The number of elements in the input and output tensors must be the same.");
19  for (unsigned int i = 0; i < inputInfo.GetNumElements(); i++)
20  {
21  // inputDecoder.Get() dequantizes the data element from whatever
22  // type is given by inputInfo to fp32 (If MakeDecoder supports that dequantization)
23  // outputEncoder.Set() transforms the data element to whatever type is
24  // given by outputInfo (if MakeEncoder supports that transformation)
25  outputEncoder.Set(inputDecoder.Get());
26  ++outputEncoder;
27  ++inputDecoder;
28  }
29 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, Decoder< IType >::Get(), TensorInfo::GetNumElements(), and Encoder< IType >::Set().

◆ Dequantize() [4/4]

float Dequantize ( QuantizedType  value,
float  scale,
int32_t  offset 
)

Dequantize an 8-bit data type into a floating point data type.

Parameters
value- The value to dequantize.
scale- The scale (must be non-zero).
offset- The offset.
Returns
- The dequantized value calculated as (value-offset)*scale.

Definition at line 48 of file TypesUtils.cpp.

49 {
50  static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
51  if (std::isnan(value))
52  {
53  throw armnn::InvalidArgumentException("Dequantize: Value is NaN");
54  }
55  return (armnn::numeric_cast<float>(value - offset)) * scale;
56 }

Referenced by QASymm8Decoder::DecodeTensor(), QASymmS8Decoder::DecodeTensor(), QSymmS8Decoder::DecodeTensor(), QSymm16Decoder::DecodeTensor(), QSymm8PerAxisDecoder::DecodeTensor(), ScaledInt32PerAxisDecoder::DecodeTensor(), SelectiveQuantizer< T, DoQuantize >::Dequantize(), Dequantize(), QASymm8Decoder::Get(), QASymmS8Decoder::Get(), QSymmS8Decoder::Get(), QSymm16Decoder::Get(), QASymm8Encoder::Get(), QASymmS8Encoder::Get(), QSymmS8Encoder::Get(), QSymm16Encoder::Get(), QSymm8PerAxisDecoder::Get(), QSymm8PerAxisEncoder::Get(), ScaledInt32PerAxisDecoder::Get(), and QSymm16PerAxisEncoder::Get().

◆ DetectionPostProcess()

void DetectionPostProcess ( const TensorInfo boxEncodingsInfo,
const TensorInfo scoresInfo,
const TensorInfo ,
const TensorInfo detectionBoxesInfo,
const TensorInfo ,
const TensorInfo ,
const TensorInfo ,
const DetectionPostProcessDescriptor desc,
Decoder< float > &  boxEncodings,
Decoder< float > &  scores,
Decoder< float > &  anchors,
float *  detectionBoxes,
float *  detectionClasses,
float *  detectionScores,
float *  numDetections 
)

Definition at line 139 of file DetectionPostProcess.cpp.

154 {
155 
156  // Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format,
157  // which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax)
158  std::vector<float> boxCorners(boxEncodingsInfo.GetNumElements());
159 
160  const unsigned int numBoxes = boxEncodingsInfo.GetShape()[1];
161  const unsigned int numScores = scoresInfo.GetNumElements();
162 
163  for (unsigned int i = 0; i < numBoxes; ++i)
164  {
165  // Y
166  float boxEncodingY = boxEncodings.Get();
167  float anchorY = anchors.Get();
168 
169  ++boxEncodings;
170  ++anchors;
171 
172  // X
173  float boxEncodingX = boxEncodings.Get();
174  float anchorX = anchors.Get();
175 
176  ++boxEncodings;
177  ++anchors;
178 
179  // H
180  float boxEncodingH = boxEncodings.Get();
181  float anchorH = anchors.Get();
182 
183  ++boxEncodings;
184  ++anchors;
185 
186  // W
187  float boxEncodingW = boxEncodings.Get();
188  float anchorW = anchors.Get();
189 
190  ++boxEncodings;
191  ++anchors;
192 
193  float yCentre = boxEncodingY / desc.m_ScaleY * anchorH + anchorY;
194  float xCentre = boxEncodingX / desc.m_ScaleX * anchorW + anchorX;
195 
196  float halfH = 0.5f * expf(boxEncodingH / desc.m_ScaleH) * anchorH;
197  float halfW = 0.5f * expf(boxEncodingW / desc.m_ScaleW) * anchorW;
198 
199  unsigned int indexY = i * 4;
200  unsigned int indexX = indexY + 1;
201  unsigned int indexH = indexX + 1;
202  unsigned int indexW = indexH + 1;
203 
204  // ymin
205  boxCorners[indexY] = yCentre - halfH;
206  // xmin
207  boxCorners[indexX] = xCentre - halfW;
208  // ymax
209  boxCorners[indexH] = yCentre + halfH;
210  // xmax
211  boxCorners[indexW] = xCentre + halfW;
212  }
213 
214  unsigned int numClassesWithBg = desc.m_NumClasses + 1;
215 
216  // Decode scores
217  std::vector<float> decodedScores;
218  decodedScores.reserve(numScores);
219 
220  for (unsigned int i = 0u; i < numScores; ++i)
221  {
222  decodedScores.emplace_back(scores.Get());
223  ++scores;
224  }
225 
226  // Perform Non Max Suppression.
227  if (desc.m_UseRegularNms)
228  {
229  // Perform Regular NMS.
230  // For each class, perform NMS and select max detection numbers of the highest score across all classes.
231  std::vector<float> classScores(numBoxes);
232 
233  std::vector<unsigned int> selectedBoxesAfterNms;
234  selectedBoxesAfterNms.reserve(numBoxes);
235 
236  std::vector<float> selectedScoresAfterNms;
237  selectedBoxesAfterNms.reserve(numScores);
238 
239  std::vector<unsigned int> selectedClasses;
240 
241  for (unsigned int c = 0; c < desc.m_NumClasses; ++c)
242  {
243  // For each boxes, get scores of the boxes for the class c.
244  for (unsigned int i = 0; i < numBoxes; ++i)
245  {
246  classScores[i] = decodedScores[i * numClassesWithBg + c + 1];
247  }
248  std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes,
249  boxCorners,
250  classScores,
251  desc.m_NmsScoreThreshold,
252  desc.m_DetectionsPerClass,
253  desc.m_NmsIouThreshold);
254 
255  for (unsigned int i = 0; i < selectedIndices.size(); ++i)
256  {
257  selectedBoxesAfterNms.push_back(selectedIndices[i]);
258  selectedScoresAfterNms.push_back(classScores[selectedIndices[i]]);
259  selectedClasses.push_back(c);
260  }
261  }
262 
263  // Select max detection numbers of the highest score across all classes
264  unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedBoxesAfterNms.size());
265  unsigned int numOutput = std::min(desc.m_MaxDetections, numSelected);
266 
267  // Sort the max scores among the selected indices.
268  std::vector<unsigned int> outputIndices = GenerateRangeK(numSelected);
269  TopKSort(numOutput, outputIndices.data(), selectedScoresAfterNms.data(), numSelected);
270 
271  AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, outputIndices,
272  selectedBoxesAfterNms, selectedClasses, selectedScoresAfterNms,
273  detectionBoxes, detectionScores, detectionClasses, numDetections);
274  }
275  else
276  {
277  // Perform Fast NMS.
278  // Select max scores of boxes and perform NMS on max scores,
279  // select max detection numbers of the highest score
280  unsigned int numClassesPerBox = std::min(desc.m_MaxClassesPerDetection, desc.m_NumClasses);
281  std::vector<float> maxScores;
282  std::vector<unsigned int>boxIndices;
283  std::vector<unsigned int>maxScoreClasses;
284 
285  for (unsigned int box = 0; box < numBoxes; ++box)
286  {
287  unsigned int scoreIndex = box * numClassesWithBg + 1;
288 
289  // Get the max scores of the box.
290  std::vector<unsigned int> maxScoreIndices = GenerateRangeK(desc.m_NumClasses);
291  TopKSort(numClassesPerBox, maxScoreIndices.data(),
292  decodedScores.data() + scoreIndex, desc.m_NumClasses);
293 
294  for (unsigned int i = 0; i < numClassesPerBox; ++i)
295  {
296  maxScores.push_back(decodedScores[scoreIndex + maxScoreIndices[i]]);
297  maxScoreClasses.push_back(maxScoreIndices[i]);
298  boxIndices.push_back(box);
299  }
300  }
301 
302  // Perform NMS on max scores
303  std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, maxScores,
304  desc.m_NmsScoreThreshold,
305  desc.m_MaxDetections,
306  desc.m_NmsIouThreshold);
307 
308  unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedIndices.size());
309  unsigned int numOutput = std::min(desc.m_MaxDetections, numSelected);
310 
311  AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, selectedIndices,
312  boxIndices, maxScoreClasses, maxScores,
313  detectionBoxes, detectionScores, detectionClasses, numDetections);
314  }
315 }

References AllocateOutputData(), GenerateRangeK(), Decoder< IType >::Get(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), DetectionPostProcessDescriptor::m_DetectionsPerClass, DetectionPostProcessDescriptor::m_MaxClassesPerDetection, DetectionPostProcessDescriptor::m_MaxDetections, DetectionPostProcessDescriptor::m_NmsIouThreshold, DetectionPostProcessDescriptor::m_NmsScoreThreshold, DetectionPostProcessDescriptor::m_NumClasses, DetectionPostProcessDescriptor::m_ScaleH, DetectionPostProcessDescriptor::m_ScaleW, DetectionPostProcessDescriptor::m_ScaleX, DetectionPostProcessDescriptor::m_ScaleY, DetectionPostProcessDescriptor::m_UseRegularNms, NonMaxSuppression(), and TopKSort().

◆ ExecuteFunction()

void armnn::ExecuteFunction ( std::vector< ITensorHandle * >  inputs,
std::vector< ITensorHandle * >  outputs,
BinaryOperation  operation,
const std::string &  layerName = "" 
)

Definition at line 27 of file RefElementwiseBinaryWorkload.cpp.

31 {
32  const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
33  const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]);
34  const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
35 
36  const TensorShape& inShape0 = inputInfo0.GetShape();
37  const TensorShape& inShape1 = inputInfo1.GetShape();
38  const TensorShape& outShape = outputInfo.GetShape();
39 
40  std::unique_ptr<Decoder<DataType>> input0 = MakeDecoder<DataType>(inputInfo0, inputs[0]->Map());
41  std::unique_ptr<Decoder<DataType>> input1 = MakeDecoder<DataType>(inputInfo1, inputs[1]->Map());
42  std::unique_ptr<Encoder<DataType>> output = MakeEncoder<DataType>(outputInfo, outputs[0]->Map());
43 
44  using AddFunction = ElementwiseBinaryFunction<std::plus<DataType>>;
45  using DivFunction = ElementwiseBinaryFunction<std::divides<DataType>>;
46  using FloorDivFunction = ElementwiseBinaryFunction<armnn::floorDiv<DataType>>;
47  using MaximumFunction = ElementwiseBinaryFunction<armnn::maximum<DataType>>;
48  using MinimumFunction = ElementwiseBinaryFunction<armnn::minimum<DataType>>;
49  using MulFunction = ElementwiseBinaryFunction<std::multiplies<DataType>>;
50  using SubFunction = ElementwiseBinaryFunction<std::minus<DataType>>;
51  using SqDiffFunction = ElementwiseBinaryFunction<armnn::squaredDifference<DataType>>;
52  using PowerFunction = ElementwiseBinaryFunction<armnn::power<DataType>>;
53 
54 
55  switch (operation)
56  {
57  case BinaryOperation::Add:
58  {
59  AddFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
60  break;
61  }
62  case BinaryOperation::Div:
63  {
64  if(!layerName.empty() && layerName.find("FloorDiv") != std::string::npos)
65  {
66  FloorDivFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
67  }
68  else
69  {
70  DivFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
71  }
72  break;
73  }
74  case BinaryOperation::Maximum:
75  {
76  MaximumFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
77  break;
78  }
79  case BinaryOperation::Minimum:
80  {
81  MinimumFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
82  break;
83  }
84  case BinaryOperation::Mul:
85  {
86  MulFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
87  break;
88  }
89  case BinaryOperation::Sub:
90  {
91  SubFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
92  break;
93  }
94  case BinaryOperation::SqDiff:
95  {
96  SqDiffFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
97  break;
98  }
99  case BinaryOperation::Power:
100  {
101  PowerFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
102  break;
103  }
104  default:
105  {
106  throw InvalidArgumentException(std::string("Unsupported binary operation ") +
108  }
109  }
110 }

References Add, CHECK_LOCATION, Div, GetBinaryOperationAsCString(), TensorInfo::GetShape(), GetTensorInfo(), Map, Maximum, Minimum, Mul, Power, SqDiff, and Sub.

◆ ExtractJsonObjects()

void armnn::ExtractJsonObjects ( unsigned int  inferenceIndex,
const Event parentEvent,
JsonChildObject parentObject,
std::map< const Event *, std::vector< const Event * >>  descendantsMap 
)

Definition at line 314 of file Profiling.cpp.

318 {
319  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(parentEvent, "parentEvent must not be null.");
320 
321  // If profiling GUID is entered, process it
322  if (parentEvent->GetProfilingGuid().has_value())
323  {
324  arm::pipe::ProfilingGuid profilingGuid;
325  profilingGuid = parentEvent->GetProfilingGuid().value();
326  parentObject.SetGuid(profilingGuid);
327  }
328  std::vector<Measurement> instrumentMeasurements = parentEvent->GetMeasurements();
329  unsigned int childIdx = 0;
330  unsigned int numSkippedKernels = 0;
331  if (inferenceIndex > 0)
332  {
333  for (auto &i: parentEvent->GetInstruments())
334  {
335  if (i->HasKernelMeasurements())
336  {
337  numSkippedKernels = static_cast<unsigned int>(parentObject.m_Children.size() -
338  instrumentMeasurements.size());
339  childIdx = numSkippedKernels;
340  }
341  }
342  }
343 
344  for (size_t measurementIndex = 0; measurementIndex < instrumentMeasurements.size(); ++measurementIndex, ++childIdx)
345  {
346  if (inferenceIndex == 0)
347  {
348  // Only add kernel measurement once, in case of multiple inferences
349  JsonChildObject measurementObject{ instrumentMeasurements[measurementIndex].m_Name };
350  measurementObject.SetUnit(instrumentMeasurements[measurementIndex].m_Unit);
351  measurementObject.SetType(JsonObjectType::Measurement);
352 
353  if (parentObject.NumChildren() != childIdx)
354  {
355  throw armnn::Exception("parentObject must have the same number of children as childIdx.");
356  }
357  parentObject.AddChild(measurementObject);
358  }
359  else
360  {
361  if (numSkippedKernels > 0)
362  {
363  parentObject.GetChild(--numSkippedKernels).AddMeasurement(0.0);
364  }
365  }
366 
367  parentObject.GetChild(childIdx).AddMeasurement(instrumentMeasurements[measurementIndex].m_Value);
368  }
369 
370  auto childEventsIt = descendantsMap.find(parentEvent);
371  if (childEventsIt != descendantsMap.end())
372  {
373  for (auto childEvent : childEventsIt->second)
374  {
375  if (inferenceIndex == 0)
376  {
377  // Only add second level once, in case of multiple inferences
378  JsonChildObject childObject{ childEvent->GetName() };
379  childObject.SetType(JsonObjectType::Event);
380  parentObject.AddChild(childObject);
381  }
382 
383  // It's possible that childIdx can overrun the parents' child vector. Check before we try to process a
384  // non-existent child.
385  if (childIdx < parentObject.NumChildren())
386  {
387  // Recursively process children.
388  ExtractJsonObjects(inferenceIndex, childEvent, parentObject.GetChild(childIdx), descendantsMap);
389  childIdx++;
390  }
391  }
392  }
393 }

References JsonChildObject::AddChild(), JsonChildObject::AddMeasurement(), ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, Event, JsonChildObject::GetChild(), Event::GetInstruments(), Event::GetMeasurements(), Event::GetProfilingGuid(), OptionalBase::has_value(), JsonChildObject::m_Children, Measurement, JsonChildObject::NumChildren(), JsonChildObject::SetGuid(), JsonChildObject::SetType(), JsonChildObject::SetUnit(), and OptionalReferenceSwitch< IsReference, T >::value().

Referenced by ProfilerImpl::Print().

◆ FakeQuantization()

void armnn::FakeQuantization ( const float *  inputData,
float *  outputData,
uint32_t  numElements,
float  min,
float  max 
)

Definition at line 17 of file RefFakeQuantizationFloat32Workload.cpp.

18 {
19  float scale = (max - min) / 255.f;
20  int32_t offset = armnn::numeric_cast<int32_t>((-min * 255.f) / (max - min));
21 
22  for (uint32_t i = 0; i < numElements; i++)
23  {
24  outputData[i] = static_cast<float>(armnn::Quantize<uint8_t>(inputData[i], scale, offset));
25  }
26 
27 }

◆ FalseFunc()

bool armnn::FalseFunc ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 62 of file LayerSupportCommon.hpp.

63 {
64  IgnoreUnused(reasonIfUnsupported);
65  IgnoreUnused(params...);
66  return false;
67 }

References IgnoreUnused().

◆ FalseFuncF16()

bool armnn::FalseFuncF16 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 70 of file LayerSupportCommon.hpp.

71 {
72  IgnoreUnused(params...);
73  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type");
74  return false;
75 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseFuncF32()

bool armnn::FalseFuncF32 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 78 of file LayerSupportCommon.hpp.

79 {
80  IgnoreUnused(params...);
81  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type");
82  return false;
83 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseFuncI32()

bool armnn::FalseFuncI32 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 94 of file LayerSupportCommon.hpp.

95 {
96  IgnoreUnused(params...);
97  SetValueChecked(reasonIfUnsupported, "Layer is not supported with int32 data type");
98  return false;
99 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseFuncU8()

bool armnn::FalseFuncU8 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 86 of file LayerSupportCommon.hpp.

87 {
88  IgnoreUnused(params...);
89  SetValueChecked(reasonIfUnsupported, "Layer is not supported with 8-bit data type");
90  return false;
91 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseInputFuncF16()

bool armnn::FalseInputFuncF16 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 110 of file LayerSupportCommon.hpp.

111 {
112  IgnoreUnused(params...);
113  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type input");
114  return false;
115 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseInputFuncF32()

bool armnn::FalseInputFuncF32 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 102 of file LayerSupportCommon.hpp.

103 {
104  IgnoreUnused(params...);
105  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type input");
106  return false;
107 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseOutputFuncF16()

bool armnn::FalseOutputFuncF16 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 126 of file LayerSupportCommon.hpp.

127 {
128  IgnoreUnused(params...);
129  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type output");
130  return false;
131 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseOutputFuncF32()

bool armnn::FalseOutputFuncF32 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 118 of file LayerSupportCommon.hpp.

119 {
120  IgnoreUnused(params...);
121  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type output");
122  return false;
123 }

References IgnoreUnused(), and SetValueChecked().

◆ Fill()

void Fill ( Encoder< float > &  output,
const TensorShape desiredOutputShape,
const float  value 
)

Creates a tensor and fills it with a scalar value.

Definition at line 13 of file Fill.cpp.

16 {
17  for(unsigned int i = 0; i < desiredOutputShape.GetNumElements(); ++i)
18  {
19  output[i];
20  output.Set(value);
21  }
22 }

References TensorShape::GetNumElements(), and Encoder< IType >::Set().

◆ FindKernelMeasurements()

std::vector<Measurement> armnn::FindKernelMeasurements ( const Event event)

Definition at line 62 of file Profiling.cpp.

63 {
64  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(event, "event should not be null.");
65 
66  std::vector<Measurement> measurements;
67 
68  // Search through the measurements.
69  for (const auto& measurement : event->GetMeasurements())
70  {
71  if (measurement.m_Name.rfind("OpenClKernelTimer", 0) == 0
72  || measurement.m_Name.rfind("NeonKernelTimer", 0) == 0)
73  {
74  // Measurement found.
75  measurements.push_back(measurement);
76  }
77  }
78 
79  return measurements;
80 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, and Event::GetMeasurements().

◆ FindMeasurement()

Measurement armnn::FindMeasurement ( const std::string &  name,
const Event event 
)

Definition at line 43 of file Profiling.cpp.

44 {
45 
46  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(event, "event should not be null.");
47 
48  // Search though the measurements.
49  for (const auto& measurement : event->GetMeasurements())
50  {
51  if (measurement.m_Name == name)
52  {
53  // Measurement found.
54  return measurement;
55  }
56  }
57 
58  // Measurement not found.
59  return Measurement{ "", 0.f, Measurement::Unit::TIME_MS };
60 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, and Event::GetMeasurements().

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults(), and ProfilerImpl::CalculateProfilingEventStats().

◆ FinishClContextBuffer()

void armnn::FinishClContextBuffer ( flatbuffers::FlatBufferBuilder &  fbb,
flatbuffers::Offset< armnn::ClContext >  root 
)
inline

Definition at line 171 of file ClContextSchema_generated.h.

173  {
174  fbb.Finish(root, ClContextIdentifier());
175 }

References ClContextIdentifier().

◆ FinishSizePrefixedClContextBuffer()

void armnn::FinishSizePrefixedClContextBuffer ( flatbuffers::FlatBufferBuilder &  fbb,
flatbuffers::Offset< armnn::ClContext >  root 
)
inline

Definition at line 177 of file ClContextSchema_generated.h.

179  {
180  fbb.FinishSizePrefixed(root, ClContextIdentifier());
181 }

References ClContextIdentifier().

◆ FoldPadIntoAveragePool2d()

LayerType* armnn::FoldPadIntoAveragePool2d ( OptimizationViews optimizationViews,
Pooling2dLayer baseLayer,
Pooling2dDescriptor poolDescriptor,
PadLayer padLayer 
)

Definition at line 341 of file SubgraphUtils.hpp.

345 {
346  IConnectableLayer* replacement =
347  optimizationViews.GetINetwork()->AddPooling2dLayer(poolDescriptor, "folded-pad-into-pool2d");
348  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
349 
350  FoldPadLayer(optimizationViews,
351  baseLayer,
352  replacementLayer,
353  padLayer);
354 
355  return replacementLayer;
356 }

References INetwork::AddPooling2dLayer(), FoldPadLayer(), and OptimizationViews::GetINetwork().

◆ FoldPadLayer()

LayerType* armnn::FoldPadLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
LayerType replacementLayer,
PadLayer padLayer 
)

Definition at line 234 of file SubgraphUtils.hpp.

238 {
239  SubgraphView substitutionSubgraph({padLayer, baseLayer},
240  CreateIInputsFrom({padLayer}),
241  CreateIOutputsFrom({baseLayer}));
242  SubgraphView replacementSubgraph(replacementLayer);
243 
244  optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
245 
246  return replacementLayer;
247 }

References OptimizationViews::AddSubstitution().

Referenced by FoldPadIntoAveragePool2d().

◆ ForEachLayerInput()

void armnn::ForEachLayerInput ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo,
Delegate  function 
)

Definition at line 277 of file SubgraphViewSelector.cpp.

280 {
281  Layer& layer = *PolymorphicDowncast<Layer*>(layerInfo.m_Layer);
282 
283  for (auto inputSlot : layer.GetInputSlots())
284  {
285  auto connectedInput = PolymorphicDowncast<OutputSlot*>(inputSlot.GetConnection());
286  if (!connectedInput)
287  {
288  throw armnn::Exception("Dangling input slot detected.");
289  }
290  Layer& inputLayer = connectedInput->GetOwningLayer();
291 
292  auto parentInfo = layerInfos.find(&inputLayer);
293  if (parentInfo != layerInfos.end())
294  {
295  function(parentInfo->second);
296  }
297  }
298 }

References Layer::GetInputSlots().

Referenced by AssignSplitId(), and IsReadyForSplitAssignment().

◆ ForEachLayerOutput()

void armnn::ForEachLayerOutput ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo,
Delegate  function 
)

Definition at line 301 of file SubgraphViewSelector.cpp.

304 {
305  Layer& layer = *PolymorphicDowncast<Layer*>(layerInfo.m_Layer);
306 
307  for (auto& outputSlot : layer.GetOutputSlots())
308  {
309  for (auto& output : outputSlot.GetConnections())
310  {
311  Layer& childLayer = output->GetOwningLayer();
312 
313  auto childInfo = layerInfos.find(&childLayer);
314  if (childInfo != layerInfos.end())
315  {
316  function(childInfo->second);
317  }
318  }
319  }
320 }

References Layer::GetOutputSlots().

Referenced by SubgraphViewSelector::SelectSubgraphs().

◆ FullyConnected()

void FullyConnected ( const TensorShape rInputShape,
Decoder< float > &  rInputDecoder,
const TensorShape rOutputShape,
Encoder< float > &  rOutputEncoder,
const TensorShape rWeightsShape,
Decoder< float > &  rWeightDecoder,
Decoder< float > *  pBiasDecoder,
const bool  biasEnabled,
const unsigned int  K,
const bool  transposeWeights 
)

Performs a matrix multiplication and optionally adds a bias.

Definition at line 13 of file FullyConnected.cpp.

23 {
24  // Perform FullyConnected implementation
25  unsigned int outputSize = rOutputShape[1];
26 
27  const std::vector<float> decodedInputs = rInputDecoder.DecodeTensor(rInputShape);
28  const std::vector<float> decodedWeights = rWeightDecoder.DecodeTensor(rWeightsShape);
29 
30  const TensorShape biasShape{outputSize};
31 
32  const std::vector<float> decodedBiases = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
33 
34 
35  for (unsigned int n = 0; n < rInputShape[0]; n++)
36  {
37  for (unsigned int channelOutput = 0; channelOutput < outputSize; channelOutput++)
38  {
39  float outval = 0.f;
40 
41  for (unsigned int channelInput = 0; channelInput < K; channelInput++)
42  {
43  float weight;
44  if (transposeWeights)
45  {
46  weight = decodedWeights[channelOutput * K + channelInput];
47  }
48  else
49  {
50  weight = decodedWeights[channelInput * outputSize + channelOutput];
51  }
52 
53  outval += weight * decodedInputs[n * K + channelInput];
54  }
55 
56  if (biasEnabled)
57  {
58  outval += decodedBiases[channelOutput];
59  }
60 
61  rOutputEncoder[n * outputSize + channelOutput];
62  rOutputEncoder.Set(outval);
63  }
64  }
65 }

References Decoder< IType >::DecodeTensor(), and Encoder< IType >::Set().

◆ FuseAdditionLayer()

LayerType* armnn::FuseAdditionLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 73 of file ArmComputeSubgraphUtils.hpp.

78 {
80  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddAdditionLayer(name.c_str());
82  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
83 
84  FuseLayer(optimizationViews,
85  baseLayer,
86  replacementLayer,
87  activationLayer,
88  activationDesc);
89 
90  return replacementLayer;
91 }

References INetwork::AddAdditionLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseBatchNormalizationLayer()

LayerType* armnn::FuseBatchNormalizationLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 178 of file ArmComputeSubgraphUtils.hpp.

183 {
184  IConnectableLayer* replacement =
185  optimizationViews.GetINetwork()->AddBatchNormalizationLayer(baseLayer->GetParameters(),
186  ConstTensor(),
187  ConstTensor(),
188  ConstTensor(),
189  ConstTensor(),
190  name.c_str());
191  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
192 
193  FuseLayer(optimizationViews,
194  baseLayer,
195  replacementLayer,
196  activationLayer,
197  activationDesc);
198 
199  SubgraphView substitutionSubgraph({baseLayer, activationLayer},
200  CreateIInputsFrom({baseLayer}),
201  CreateIOutputsFrom({activationLayer}));
202  SubgraphView replacementSubgraph(replacementLayer);
203 
204  return replacementLayer;
205 }

References INetwork::AddBatchNormalizationLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseConvolution2dLayer()

LayerType* armnn::FuseConvolution2dLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 208 of file ArmComputeSubgraphUtils.hpp.

213 {
214  IConnectableLayer* replacement = optimizationViews.GetINetwork()
215  ->AddConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
216 
217  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
218 
219 
220  FuseLayer(optimizationViews,
221  baseLayer,
222  replacementLayer,
223  activationLayer,
224  activationDesc);
225 
226  return replacementLayer;
227 }

References INetwork::AddConvolution2dLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseDepthwiseConvolution2dLayer()

LayerType* armnn::FuseDepthwiseConvolution2dLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 230 of file ArmComputeSubgraphUtils.hpp.

235 {
236  IConnectableLayer* replacement =
237  optimizationViews.GetINetwork()->AddDepthwiseConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
238 
239  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
240 
241 
242  FuseLayer(optimizationViews,
243  baseLayer,
244  replacementLayer,
245  activationLayer,
246  activationDesc);
247 
248  return replacementLayer;
249 }

References INetwork::AddDepthwiseConvolution2dLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseDivisionLayer()

LayerType* armnn::FuseDivisionLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 115 of file ArmComputeSubgraphUtils.hpp.

120 {
122  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddDivisionLayer(name.c_str());
124  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
125 
126  FuseLayer(optimizationViews,
127  baseLayer,
128  replacementLayer,
129  activationLayer,
130  activationDesc);
131 
132  return replacementLayer;
133 }

References INetwork::AddDivisionLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseElementwiseBinaryLayer()

LayerType* armnn::FuseElementwiseBinaryLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
BinaryOperation  operation,
std::string  name 
)

Definition at line 157 of file ArmComputeSubgraphUtils.hpp.

163 {
164  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddElementwiseBinaryLayer(operation,
165  name.c_str());
166  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
167 
168  FuseLayer(optimizationViews,
169  baseLayer,
170  replacementLayer,
171  activationLayer,
172  activationDesc);
173 
174  return replacementLayer;
175 }

References INetwork::AddElementwiseBinaryLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseFullyConnectedLayer()

LayerType* armnn::FuseFullyConnectedLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 252 of file ArmComputeSubgraphUtils.hpp.

257 {
258  IConnectableLayer* replacement =
259  optimizationViews.GetINetwork()->AddFullyConnectedLayer(baseLayer->GetParameters(),
260  name.c_str());
261  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
262 
263  FuseLayer(optimizationViews,
264  baseLayer,
265  replacementLayer,
266  activationLayer,
267  activationDesc);
268 
269 
270  return replacementLayer;
271 }

References INetwork::AddFullyConnectedLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseLayer()

LayerType* armnn::FuseLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
LayerType replacementLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc 
)

Definition at line 53 of file ArmComputeSubgraphUtils.hpp.

58 {
59  replacementLayer->SetAdditionalInfoForObject(
60  std::make_shared<ActivationDescriptor>(activationDesc));
61 
62  SubgraphView substitutionSubgraph({baseLayer, activationLayer},
63  CreateIInputsFrom({baseLayer}),
64  CreateIOutputsFrom({activationLayer}));
65  SubgraphView replacementSubgraph(replacementLayer);
66 
67  optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
68 
69  return replacementLayer;
70 }

References OptimizationViews::AddSubstitution().

Referenced by FuseAdditionLayer(), FuseBatchNormalizationLayer(), FuseConvolution2dLayer(), FuseDepthwiseConvolution2dLayer(), FuseDivisionLayer(), FuseElementwiseBinaryLayer(), FuseFullyConnectedLayer(), FuseMultiplicationLayer(), and FuseSubtractionLayer().

◆ FuseMultiplicationLayer()

LayerType* armnn::FuseMultiplicationLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 136 of file ArmComputeSubgraphUtils.hpp.

141 {
143  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddMultiplicationLayer(name.c_str());
145  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
146 
147  FuseLayer(optimizationViews,
148  baseLayer,
149  replacementLayer,
150  activationLayer,
151  activationDesc);
152 
153  return replacementLayer;
154 }

References INetwork::AddMultiplicationLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseSubtractionLayer()

LayerType* armnn::FuseSubtractionLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 94 of file ArmComputeSubgraphUtils.hpp.

99 {
101  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddSubtractionLayer(name.c_str());
103  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
104 
105  FuseLayer(optimizationViews,
106  baseLayer,
107  replacementLayer,
108  activationLayer,
109  activationDesc);
110 
111  return replacementLayer;
112 }

References INetwork::AddSubtractionLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ Gather()

void Gather ( const TensorInfo paramsInfo,
const TensorInfo indicesInfo,
const TensorInfo outputInfo,
Decoder< float > &  params,
const int32_t *  indices,
Encoder< float > &  output,
const int32_t  axis_int 
)

Definition at line 15 of file Gather.cpp.

22 {
23  IgnoreUnused(outputInfo);
24 
25  const int paramsRank = static_cast<int>(paramsInfo.GetNumDimensions());
26  if((axis_int < -1 * paramsRank) || (paramsRank <= axis_int))
27  {
28  throw InvalidArgumentException((fmt::format("Gather: Axis {} is not within [-{}, {}) range",
29  axis_int, paramsRank, paramsRank)));
30  }
31  const unsigned int axis = (axis_int < 0) ? static_cast<unsigned int>(paramsRank + axis_int)
32  : static_cast<unsigned int>(axis_int);
33 
34  const TensorShape& paramsShape = paramsInfo.GetShape();
35 
36  // Product of all dimensions to the left side of the axis
37  unsigned int paramsOuterProduct = 1;
38  for (unsigned int i = 0; i < axis; ++i)
39  {
40  paramsOuterProduct *= paramsShape[i];
41  }
42  // Product of all dimensions to the right side of the axis
43  unsigned int paramsInnerProduct = 1;
44  for (unsigned int k = 1 + axis; k < paramsInfo.GetNumDimensions(); ++k)
45  {
46  paramsInnerProduct *= paramsShape[k];
47  }
48 
49  unsigned int offset = 0;
50  unsigned int outIndex = 0;
51  for (unsigned int i = 0; i < paramsOuterProduct; ++i)
52  {
53  for (unsigned int j = 0; j < indicesInfo.GetNumElements(); ++j)
54  {
55  unsigned int index =
56  (indices[j] < 0) ? static_cast<unsigned int>(static_cast<int>(paramsShape[axis]) + indices[j])
57  : static_cast<unsigned int>(indices[j]);
58 
59  if (index >= paramsShape[axis])
60  {
61  throw InvalidArgumentException((fmt::format("Gather: index >= paramsShape[axis]: {} >= {}",
62  index, paramsShape[axis] )));
63  }
64 
65  unsigned int startOffset = (paramsInnerProduct * index) + offset;
66  unsigned int endOffset = startOffset + paramsInnerProduct;
67 
68  for (unsigned int k = startOffset; k < endOffset; ++k)
69  {
70  params[k];
71  float outputValue = params.Get();
72  output[outIndex];
73  output.Set(outputValue);
74  ++outIndex;
75  }
76  }
77  offset += paramsShape[axis] * paramsInnerProduct;
78  }
79 
80  if (outIndex != outputInfo.GetNumElements())
81  {
82  throw InvalidArgumentException((fmt::format("Gather: Invalid outIndex {} ", outIndex)));
83  }
84 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), IgnoreUnused(), and Encoder< IType >::Set().

◆ GatherTensorHandlePairs()

void armnn::GatherTensorHandlePairs ( const DescriptorType &  descriptor,
std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType * >> &  tensorHandlePairs 
)

Definition at line 204 of file WorkloadUtils.hpp.

206 {
207  const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
208  tensorHandlePairs.reserve(numInputs);
209 
210  for (unsigned int i = 0; i < numInputs; ++i)
211  {
212  SrcTensorHandleType* const srcTensorHandle =
213  PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
214  DstTensorHandleType* const dstTensorHandle =
215  PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
216 
217  tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
218  }
219 }

Referenced by CopyMemGenericWorkload::CopyMemGenericWorkload(), CopyMemGenericWorkload::ExecuteAsync(), NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(), and NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload().

◆ GeneratePermutationVectorOnLastTwoDimensions()

armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions ( unsigned int  rank)

Generates a permutation vector of size rank that permutes the 2 most right dimensions.

Parameters
rank- Tensor rank, i.e. number of dimensions in the tensors
Returns
- A permutation vector that permutes the 2 last dimensions

Definition at line 357 of file WorkloadUtils.cpp.

358 {
359  armnn::PermutationVector permutationVector{};
360  switch (rank)
361  {
362  case 2:
363  permutationVector = {1U, 0U};
364  break;
365  case 3:
366  permutationVector = {0U, 2U, 1U};
367  break;
368  case 4:
369  permutationVector = {0U, 1U, 3U, 2U};
370  break;
371  default:
372  throw Exception("Invalid number of dimensions.");
373  }
374  return permutationVector;
375 }

◆ GenerateRangeK()

std::vector<unsigned int> armnn::GenerateRangeK ( unsigned int  k)

Definition at line 16 of file DetectionPostProcess.cpp.

17 {
18  std::vector<unsigned int> range(k);
19  std::iota(range.begin(), range.end(), 0);
20  return range;
21 }

Referenced by DetectionPostProcess(), and NonMaxSuppression().

◆ GetActivationFunctionAsCString()

constexpr char const* armnn::GetActivationFunctionAsCString ( ActivationFunction  activation)
constexpr

Definition at line 31 of file TypesUtils.hpp.

32 {
33  switch (activation)
34  {
35  case ActivationFunction::Sigmoid: return "Sigmoid";
36  case ActivationFunction::TanH: return "TanH";
37  case ActivationFunction::Linear: return "Linear";
38  case ActivationFunction::ReLu: return "ReLu";
39  case ActivationFunction::BoundedReLu: return "BoundedReLu";
40  case ActivationFunction::SoftReLu: return "SoftReLu";
41  case ActivationFunction::LeakyReLu: return "LeakyReLu";
42  case ActivationFunction::Abs: return "Abs";
43  case ActivationFunction::Sqrt: return "Sqrt";
44  case ActivationFunction::Square: return "Square";
45  case ActivationFunction::Elu: return "Elu";
46  case ActivationFunction::HardSwish: return "HardSwish";
47  case ActivationFunction::Gelu: return "Gelu";
48  default: return "Unknown";
49  }
50 }

References Abs, BoundedReLu, Elu, Gelu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by StringifyLayerParameters< ActivationDescriptor >::Serialize().

◆ GetArgMinMaxFunctionAsCString()

constexpr char const* armnn::GetArgMinMaxFunctionAsCString ( ArgMinMaxFunction  function)
constexpr

Definition at line 52 of file TypesUtils.hpp.

53 {
54  switch (function)
55  {
56  case ArgMinMaxFunction::Max: return "Max";
57  case ArgMinMaxFunction::Min: return "Min";
58  default: return "Unknown";
59  }
60 }

References Max, and Min.

◆ GetBiasDataType()

DataType GetBiasDataType ( DataType  inputDataType)

Definition at line 28 of file WorkloadData.cpp.

29 {
30  switch (inputDataType)
31  {
32  case DataType::Float16:
33  return DataType::Float16;
34  case DataType::BFloat16:
35  case DataType::Float32:
36  return DataType::Float32;
37  case DataType::QAsymmS8:
38  case DataType::QAsymmU8:
39  case DataType::QSymmS8:
40  case DataType::QSymmS16:
41  return DataType::Signed32;
42  default:
43  throw InvalidArgumentException("GetBiasDataType(): Unsupported data type.");
44  }
45 }

References BFloat16.

Referenced by FullyConnectedQueueDescriptor::Validate(), Convolution2dQueueDescriptor::Validate(), Convolution3dQueueDescriptor::Validate(), DepthwiseConvolution2dQueueDescriptor::Validate(), and TransposeConvolution2dQueueDescriptor::Validate().

◆ GetBiasTypeFromWeightsType()

armnn::Optional< armnn::DataType > GetBiasTypeFromWeightsType ( armnn::Optional< armnn::DataType weightsType)
inline

Definition at line 13 of file LayerSupportRules.hpp.

14 {
15  if (!weightsType)
16  {
17  return weightsType;
18  }
19 
20  switch(weightsType.value())
21  {
24  return weightsType;
30  default:
31  throw InvalidArgumentException("GetBiasTypeFromWeightsType(): Unsupported data type.");
32  }
33  return armnn::EmptyOptional();
34 }

References Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by BiasAndWeightsTypesMatch::BiasAndWeightsTypesMatch().

◆ GetBinaryOperationAsCString()

constexpr char const* armnn::GetBinaryOperationAsCString ( BinaryOperation  operation)
constexpr

Definition at line 76 of file TypesUtils.hpp.

77 {
78  switch (operation)
79  {
80  case BinaryOperation::Add: return "Add";
81  case BinaryOperation::Div: return "Div";
82  case BinaryOperation::Maximum: return "Maximum";
83  case BinaryOperation::Minimum: return "Minimum";
84  case BinaryOperation::Mul: return "Mul";
85  case BinaryOperation::Power: return "Power";
86  case BinaryOperation::SqDiff: return "SqDiff";
87  case BinaryOperation::Sub: return "Sub";
88  default: return "Unknown";
89  }
90 }

References Add, Div, Maximum, Minimum, Mul, Power, SqDiff, and Sub.

Referenced by ExecuteFunction(), and StringifyLayerParameters< ElementwiseBinaryDescriptor >::Serialize().

◆ GetCapability() [1/2]

Optional< const BackendOptions::BackendOption > GetCapability ( const std::string &  backendCapabilityName,
const armnn::BackendId backend 
)

Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted.

Definition at line 51 of file BackendHelper.cpp.

53 {
54  auto const& backendRegistry = armnn::BackendRegistryInstance();
55  if (backendRegistry.IsBackendRegistered(backend))
56  {
57  auto factoryFunc = backendRegistry.GetFactory(backend);
58  auto backendObject = factoryFunc();
59  auto capabilities = backendObject->GetCapabilities();
60  return GetCapability(backendCapabilityName, capabilities);
61  }
62  return EmptyOptional();
63 }

References BackendRegistryInstance(), and GetCapability().

◆ GetCapability() [2/2]

Optional< const BackendOptions::BackendOption > GetCapability ( const std::string &  backendCapabilityName,
const BackendCapabilities capabilities 
)

Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted.

Definition at line 37 of file BackendHelper.cpp.

39 {
40  for (size_t i=0; i < capabilities.GetOptionCount(); i++)
41  {
42  const auto& capability = capabilities.GetOption(i);
43  if (backendCapabilityName == capability.GetName())
44  {
45  return capability;
46  }
47  }
48  return EmptyOptional();
49 }

References BackendOptions::GetOption(), and BackendOptions::GetOptionCount().

Referenced by GetCapability(), HasCapability(), LayerSupportHandle::IsConvolution2dSupported(), LayerSupportHandle::IsDepthwiseConvolutionSupported(), LayerSupportHandle::IsDilatedDepthwiseConvolutionSupported(), and LayerSupportHandle::IsFullyConnectedSupported().

◆ GetClContext()

const armnn::ClContext* armnn::GetClContext ( const void *  buf)
inline

Definition at line 140 of file ClContextSchema_generated.h.

140  {
141  return flatbuffers::GetRoot<armnn::ClContext>(buf);
142 }

Referenced by ClContextDeserializer::DeserializeFromBinary().

◆ GetComparisonOperationAsCString()

constexpr char const* armnn::GetComparisonOperationAsCString ( ComparisonOperation  operation)
constexpr

Definition at line 62 of file TypesUtils.hpp.

63 {
64  switch (operation)
65  {
66  case ComparisonOperation::Equal: return "Equal";
67  case ComparisonOperation::Greater: return "Greater";
68  case ComparisonOperation::GreaterOrEqual: return "GreaterOrEqual";
69  case ComparisonOperation::Less: return "Less";
70  case ComparisonOperation::LessOrEqual: return "LessOrEqual";
71  case ComparisonOperation::NotEqual: return "NotEqual";
72  default: return "Unknown";
73  }
74 }

References Equal, Greater, GreaterOrEqual, Less, LessOrEqual, and NotEqual.

Referenced by StringifyLayerParameters< ComparisonDescriptor >::Serialize().

◆ GetComputeDeviceAsCString()

constexpr char const* armnn::GetComputeDeviceAsCString ( Compute  compute)
constexpr

Deprecated function that will be removed together with the Compute enum.

Definition at line 34 of file BackendId.hpp.

35 {
36  switch (compute)
37  {
38  case armnn::Compute::CpuRef: return "CpuRef";
39  case armnn::Compute::CpuAcc: return "CpuAcc";
40  case armnn::Compute::GpuAcc: return "GpuAcc";
41  default: return "Unknown";
42  }
43 }

References CpuAcc, CpuRef, and GpuAcc.

Referenced by BackendId::BackendId(), BackendId::IsCpuAcc(), BackendId::IsCpuRef(), BackendId::IsGpuAcc(), BackendId::IsUndefined(), and operator<<().

◆ GetConvolutionMethodString()

std::string GetConvolutionMethodString ( arm_compute::ConvolutionMethod &  convolutionMethod)
inline

Definition at line 46 of file ClWorkloadUtils.hpp.

47 {
48  switch (convolutionMethod)
49  {
50  case arm_compute::ConvolutionMethod::FFT:
51  return "FFT";
52  case arm_compute::ConvolutionMethod::DIRECT:
53  return "Direct";
54  case arm_compute::ConvolutionMethod::GEMM:
55  return "GEMM";
56  case arm_compute::ConvolutionMethod::WINOGRAD:
57  return "Winograd";
58  default:
59  return "Unknown";
60  }
61 }

◆ GetDataLayoutName()

◆ GetDataTypeName()

constexpr const char* armnn::GetDataTypeName ( DataType  dataType)
constexpr

Definition at line 233 of file TypesUtils.hpp.

234 {
235  switch (dataType)
236  {
237  case DataType::Float16: return "Float16";
238  case DataType::Float32: return "Float32";
239  case DataType::Signed64: return "Signed64";
240  case DataType::QAsymmU8: return "QAsymmU8";
241  case DataType::QAsymmS8: return "QAsymmS8";
242  case DataType::QSymmS8: return "QSymmS8";
243  case DataType::QSymmS16: return "QSymm16";
244  case DataType::Signed32: return "Signed32";
245  case DataType::Boolean: return "Boolean";
246  case DataType::BFloat16: return "BFloat16";
247 
248  default:
249  return "Unknown";
250  }
251 }

References BFloat16, Boolean, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and Signed64.

Referenced by AttemptBackendAssignment(), RefDebugWorkload< DataType >::GetName(), armnnUtils::GetPerAxisParams(), ConstantLayer::SerializeLayerParameters(), armnnUtils::ToFloatArray(), and VerifyTensorInfoDataType().

◆ GetDataTypeSize()

constexpr unsigned int armnn::GetDataTypeSize ( DataType  dataType)
constexpr

Definition at line 182 of file TypesUtils.hpp.

183 {
184  switch (dataType)
185  {
186  case DataType::BFloat16:
187  case DataType::Float16: return 2U;
188  case DataType::Float32:
189  case DataType::Signed32: return 4U;
190  case DataType::Signed64: return 8U;
191  case DataType::QAsymmU8: return 1U;
192  case DataType::QAsymmS8: return 1U;
193  case DataType::QSymmS8: return 1U;
194  case DataType::QSymmS16: return 2U;
195  case DataType::Boolean: return 1U;
196  default: return 0U;
197  }
198 }

References BFloat16, Boolean, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and Signed64.

Referenced by TosaRefTensorHandle::CanBeImported(), RefTensorHandle::CanBeImported(), TensorInfo::GetNumBytes(), GetUnpaddedTensorStrides(), PermuteTensor(), and armnn_driver::SwizzleAndroidNn4dTensorToArmNn().

◆ GetEventPtr() [1/2]

const Event* armnn::GetEventPtr ( const Event ptr)

Definition at line 109 of file Profiling.cpp.

109 { return ptr;}

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults().

◆ GetEventPtr() [2/2]

const Event* armnn::GetEventPtr ( const std::unique_ptr< Event > &  ptr)

Definition at line 110 of file Profiling.cpp.

110 {return ptr.get(); }

◆ GetFusedName()

void armnn::GetFusedName ( Layer layerList[4],
std::string &  fusedName 
)
inline

Definition at line 71 of file NeonBackendOptimizationUtils.hpp.

72 {
73  // Build the fused name string
74  fusedName = "fused";
75  for (unsigned int layerIdx = 0; layerIdx< 4; ++layerIdx)
76  {
77  if (! layerList[layerIdx])
78  {
79  break;
80  }
81  fusedName += "-";
82  fusedName += layerList[layerIdx]->GetNameStr();
83  }
84 }

References Layer::GetNameStr().

Referenced by NeonBackend::OptimizeSubgraphView().

◆ GetFusedTypeAsCString()

constexpr char const* armnn::GetFusedTypeAsCString ( FusedKernelType  type)
constexpr

Definition at line 119 of file TypesUtils.hpp.

120 {
121  switch (type)
122  {
123  case FusedKernelType::AddMulAdd: return "AddMulAdd";
124  default: return "Unknown";
125  }
126 }

References AddMulAdd.

Referenced by StringifyLayerParameters< FusedDescriptor >::Serialize().

◆ GetGraphForTesting()

Graph & GetGraphForTesting ( IOptimizedNetwork optNet)

Definition at line 49 of file TestUtils.cpp.

50 {
51  return optNet->pOptimizedNetworkImpl->GetGraph();
52 }

References IOptimizedNetwork::pOptimizedNetworkImpl.

◆ GetILayerSupportByBackendId()

LayerSupportHandle GetILayerSupportByBackendId ( const armnn::BackendId backend)

Convenience function to retrieve the ILayerSupportHandle for a backend.

Definition at line 23 of file BackendHelper.cpp.

24 {
25  BackendRegistry& backendRegistry = armnn::BackendRegistryInstance();
26 
27  if (!backendRegistry.IsBackendRegistered(backend))
28  {
29  return LayerSupportHandle(nullptr);
30  }
31 
32  auto factoryFunc = backendRegistry.GetFactory(backend);
33  auto backendObject = factoryFunc();
34  return LayerSupportHandle(backendObject->GetLayerSupport(), backend);
35 }

References BackendRegistryInstance(), BackendRegistry::GetFactory(), and BackendRegistry::IsBackendRegistered().

◆ GetInputTensor()

const armnn::ConstTensor armnn::GetInputTensor ( const LayerBindingId  layerId,
const InputTensors inputTensors 
)

Definition at line 1460 of file LoadedNetwork.cpp.

1461 {
1462  for (auto inputTensorPair : inputTensors)
1463  {
1464  LayerBindingId id = inputTensorPair.first;
1465  if (id == layerId)
1466  {
1467  return inputTensorPair.second;
1468  }
1469  }
1470  throw InvalidArgumentException("Input does not exist.");
1471 }

◆ GetInputTensorData()

const DataType* armnn::GetInputTensorData ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 42 of file RefWorkloadUtils.hpp.

43 {
44  const ITensorHandle* tensorHandle = data.m_Inputs[idx];
45  return reinterpret_cast<const DataType*>(tensorHandle->Map());
46 }

References ITensorHandle::Map().

◆ GetInputTensorDataBFloat16()

const BFloat16* armnn::GetInputTensorDataBFloat16 ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 86 of file RefWorkloadUtils.hpp.

87 {
88  return GetInputTensorData<BFloat16>(idx, data);
89 }

◆ GetInputTensorDataFloat()

const float* armnn::GetInputTensorDataFloat ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 62 of file RefWorkloadUtils.hpp.

63 {
64  return GetInputTensorData<float>(idx, data);
65 }

◆ GetInputTensorDataHalf()

const Half* armnn::GetInputTensorDataHalf ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 74 of file RefWorkloadUtils.hpp.

75 {
76  return GetInputTensorData<Half>(idx, data);
77 }

◆ GetLayerInOutDatatype()

std::vector<DataType> armnn::GetLayerInOutDatatype ( const Layer layer)
inline

Definition at line 1017 of file Network.cpp.

1018 {
1019  DataType dataTypeIn = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
1020  layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
1021  DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
1022  layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
1023  return {dataTypeIn, dataTypeOut};
1024 }

References Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetNumInputSlots(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), and OutputSlot::GetTensorInfo().

Referenced by AssignBackends(), and AssignBackendsIConnectable().

◆ GetLayerTypeAsCString()

◆ GetLogicalBinaryOperationAsCString()

constexpr char const* armnn::GetLogicalBinaryOperationAsCString ( LogicalBinaryOperation  operation)
constexpr

Definition at line 109 of file TypesUtils.hpp.

110 {
111  switch (operation)
112  {
113  case LogicalBinaryOperation::LogicalAnd: return "LogicalAnd";
114  case LogicalBinaryOperation::LogicalOr: return "LogicalOr";
115  default: return "Unknown";
116  }
117 }

References LogicalAnd, and LogicalOr.

◆ GetMemBlockStrategyTypeName()

constexpr const char* armnn::GetMemBlockStrategyTypeName ( MemBlockStrategyType  memBlockStrategyType)
constexpr

Definition at line 295 of file TypesUtils.hpp.

296 {
297  switch (memBlockStrategyType)
298  {
299  case MemBlockStrategyType::SingleAxisPacking: return "SingleAxisPacking";
300  case MemBlockStrategyType::MultiAxisPacking: return "MultiAxisPacking";
301  default: return "Unknown";
302  }
303 }

References MultiAxisPacking, and SingleAxisPacking.

Referenced by RuntimeImpl::RuntimeImpl().

◆ GetMemoryOptimizerStrategy()

std::unique_ptr<IMemoryOptimizerStrategy> armnn::GetMemoryOptimizerStrategy ( const std::string &  strategyName)
inline

Definition at line 36 of file MemoryOptimizerStrategyLibrary.hpp.

37 {
38  const auto& strategyFactoryMap = GetStrategyFactories();
39  auto strategyFactory = strategyFactoryMap.find(strategyName);
40  if (strategyFactory != GetStrategyFactories().end())
41  {
42  return strategyFactory->second->CreateMemoryOptimizerStrategy();
43  }
44  return nullptr;
45 }

Referenced by RuntimeImpl::RuntimeImpl().

◆ GetMemoryOptimizerStrategyNames()

const std::vector<std::string> armnn::GetMemoryOptimizerStrategyNames ( )
inline

Definition at line 47 of file MemoryOptimizerStrategyLibrary.hpp.

48 {
49  const auto& strategyFactoryMap = GetStrategyFactories();
50  std::vector<std::string> strategyNames;
51  for (const auto& strategyFactory : strategyFactoryMap)
52  {
53  strategyNames.emplace_back(strategyFactory.first);
54  }
55  return strategyNames;
56 }

◆ GetModelOptionsForTesting()

ModelOptions & GetModelOptionsForTesting ( IOptimizedNetwork optNet)

Definition at line 54 of file TestUtils.cpp.

55 {
56  return optNet->pOptimizedNetworkImpl->GetModelOptions();
57 }

References IOptimizedNetwork::pOptimizedNetworkImpl.

◆ GetNormalizationAlgorithmChannelAsCString()

constexpr const char* armnn::GetNormalizationAlgorithmChannelAsCString ( NormalizationAlgorithmChannel  channel)
constexpr

Definition at line 265 of file TypesUtils.hpp.

266 {
267  switch (channel)
268  {
269  case NormalizationAlgorithmChannel::Across: return "Across";
270  case NormalizationAlgorithmChannel::Within: return "Within";
271  default: return "Unknown";
272  }
273 }

References Across, and Within.

Referenced by StringifyLayerParameters< NormalizationDescriptor >::Serialize().

◆ GetNormalizationAlgorithmMethodAsCString()

constexpr const char* armnn::GetNormalizationAlgorithmMethodAsCString ( NormalizationAlgorithmMethod  method)
constexpr

Definition at line 275 of file TypesUtils.hpp.

276 {
277  switch (method)
278  {
279  case NormalizationAlgorithmMethod::LocalBrightness: return "LocalBrightness";
280  case NormalizationAlgorithmMethod::LocalContrast: return "LocalContrast";
281  default: return "Unknown";
282  }
283 }

References LocalBrightness, and LocalContrast.

Referenced by StringifyLayerParameters< NormalizationDescriptor >::Serialize().

◆ GetNumActivations()

unsigned int armnn::GetNumActivations ( const TensorInfo inputInfo)

Definition at line 16 of file RefFullyConnectedWorkload.cpp.

17 {
18  unsigned int numActivations = 1; // Total number of activations in the input.
19  for (unsigned int i = 1; i < inputInfo.GetNumDimensions(); i++)
20  {
21  numActivations *= inputInfo.GetShape()[i];
22  }
23  return numActivations;
24 }

References TensorInfo::GetNumDimensions(), and TensorInfo::GetShape().

◆ GetNumberOfCacheFiles()

unsigned int GetNumberOfCacheFiles ( const armnn::BackendId backend)

Returns the number of cached files if backend supports caching.

Definition at line 130 of file BackendHelper.cpp.

131 {
132  auto const& backendRegistry = armnn::BackendRegistryInstance();
133  if (backendRegistry.IsBackendRegistered(backend))
134  {
135  auto factoryFunc = backendRegistry.GetFactory(backend);
136  auto backendObject = factoryFunc();
137  return backendObject->GetNumberOfCacheFiles();
138  }
139  return 0;
140 }

References BackendRegistryInstance().

Referenced by ArmnnDriver::getNumberOfCacheFilesNeeded(), ArmnnDriverImpl::PrepareArmnnModel(), and ArmnnDriverImpl::PrepareArmnnModelFromCache().

◆ GetNumInputs()

uint32_t armnn::GetNumInputs ( bool  biasEnabled)

Definition at line 455 of file Descriptors.cpp.

456 {
457  unsigned int numInputs = 2;
458  if (biasEnabled)
459  {
460  numInputs = 3;
461  }
462  return numInputs;
463 }

Referenced by FullyConnectedDescriptor::GetNumInputs(), Convolution2dDescriptor::GetNumInputs(), Convolution3dDescriptor::GetNumInputs(), and DepthwiseConvolution2dDescriptor::GetNumInputs().

◆ GetOffset()

unsigned int armnn::GetOffset ( const TensorShape shape,
unsigned int  b,
unsigned int  h,
unsigned int  w,
unsigned int  c,
const DataLayoutIndexed dataLayout 
)

Definition at line 15 of file SpaceToBatchNd.cpp.

21 {
22  // 3D Tensors
23  unsigned int channelDimension3D = dataLayout.GetDataLayout() == DataLayout::NCHW ? 1 : 2;
24  if (shape.GetNumDimensions() == 3)
25  {
26  return (b * shape[dataLayout.GetHeightIndex()] + h) * shape[channelDimension3D] + c;
27  }
28  // 4D Tensors
29  else if (shape.GetNumDimensions() == 4)
30  {
31  if (dataLayout.GetDataLayout() == DataLayout::NHWC)
32  {
33  return ((b * shape[dataLayout.GetHeightIndex()] + h) * shape[dataLayout.GetWidthIndex()] + w) *
34  shape[dataLayout.GetChannelsIndex()] + c;
35  }
36  else
37  {
38  return ((b * shape[dataLayout.GetChannelsIndex()] + c) * shape[dataLayout.GetHeightIndex()] + h) *
39  shape[dataLayout.GetWidthIndex()] + w;
40  }
41  }
42  else
43  {
44  throw InvalidArgumentException("Tensor rank must be either 3 or 4", CHECK_LOCATION());
45  }
46 }

References CHECK_LOCATION, DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumDimensions(), and DataLayoutIndexed::GetWidthIndex().

Referenced by SpaceToBatchNd(), and SpaceToDepth().

◆ GetOutputShapeRoundingAsCString()

constexpr char const* armnn::GetOutputShapeRoundingAsCString ( OutputShapeRounding  rounding)
constexpr

Definition at line 139 of file TypesUtils.hpp.

140 {
141  switch (rounding)
142  {
143  case OutputShapeRounding::Ceiling: return "Ceiling";
144  case OutputShapeRounding::Floor: return "Floor";
145  default: return "Unknown";
146  }
147 }

References Ceiling, and Floor.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

◆ GetOutputTensor()

const armnn::Tensor armnn::GetOutputTensor ( const LayerBindingId  layerId,
const OutputTensors outputTensors 
)

Definition at line 1473 of file LoadedNetwork.cpp.

1474 {
1475  for (auto outputTensorPair : outputTensors)
1476  {
1477  LayerBindingId id = outputTensorPair.first;
1478  if (id == layerId)
1479  {
1480  return outputTensorPair.second;
1481  }
1482  }
1483  throw InvalidArgumentException("Output does not exist.");
1484 }

◆ GetOutputTensorData() [1/2]

DataType* armnn::GetOutputTensorData ( ITensorHandle tensorHandle)

Definition at line 56 of file RefWorkloadUtils.hpp.

57 {
58  return reinterpret_cast<DataType*>(tensorHandle->Map());
59 }

References ITensorHandle::Map().

◆ GetOutputTensorData() [2/2]

DataType * GetOutputTensorData ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 180 of file ClWorkloadUtils.hpp.

181 {
182  ITensorHandle* tensorHandle = data.m_Outputs[idx];
183  return reinterpret_cast<DataType*>(tensorHandle->Map());
184 }

References ITensorHandle::Map().

◆ GetOutputTensorDataBFloat16()

BFloat16* armnn::GetOutputTensorDataBFloat16 ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 92 of file RefWorkloadUtils.hpp.

93 {
94  return GetOutputTensorData<BFloat16>(idx, data);
95 }

◆ GetOutputTensorDataFloat()

float* armnn::GetOutputTensorDataFloat ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 68 of file RefWorkloadUtils.hpp.

69 {
70  return GetOutputTensorData<float>(idx, data);
71 }

◆ GetOutputTensorDataHalf()

Half* armnn::GetOutputTensorDataHalf ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 80 of file RefWorkloadUtils.hpp.

81 {
82  return GetOutputTensorData<Half>(idx, data);
83 }

◆ GetPaddingMethodAsCString()

constexpr char const* armnn::GetPaddingMethodAsCString ( PaddingMethod  method)
constexpr

Definition at line 149 of file TypesUtils.hpp.

150 {
151  switch (method)
152  {
153  case PaddingMethod::Exclude: return "Exclude";
154  case PaddingMethod::IgnoreValue: return "IgnoreValue";
155  default: return "Unknown";
156  }
157 }

References Exclude, and IgnoreValue.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

◆ GetPaddingModeAsCString()

constexpr char const* armnn::GetPaddingModeAsCString ( PaddingMode  mode)
constexpr

Definition at line 159 of file TypesUtils.hpp.

160 {
161  switch (mode)
162  {
163  case PaddingMode::Constant: return "Exclude";
164  case PaddingMode::Symmetric: return "Symmetric";
165  case PaddingMode::Reflect: return "Reflect";
166  default: return "Unknown";
167  }
168 }

References Constant, Reflect, and Symmetric.

Referenced by StringifyLayerParameters< PadDescriptor >::Serialize().

◆ GetPoolingAlgorithmAsCString()

constexpr char const* armnn::GetPoolingAlgorithmAsCString ( PoolingAlgorithm  pooling)
constexpr

Definition at line 128 of file TypesUtils.hpp.

129 {
130  switch (pooling)
131  {
132  case PoolingAlgorithm::Average: return "Average";
133  case PoolingAlgorithm::Max: return "Max";
134  case PoolingAlgorithm::L2: return "L2";
135  default: return "Unknown";
136  }
137 }

References Average, L2, and Max.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

◆ GetProfilingService()

arm::pipe::IProfilingService & GetProfilingService ( armnn::RuntimeImpl runtime)

Definition at line 59 of file TestUtils.cpp.

60 {
61  return *(runtime->m_ProfilingService.get());
62 }

◆ GetReduceOperationAsCString()

constexpr char const* armnn::GetReduceOperationAsCString ( ReduceOperation  reduce_operation)
constexpr

Definition at line 170 of file TypesUtils.hpp.

171 {
172  switch (reduce_operation)
173  {
174  case ReduceOperation::Sum: return "Sum";
175  case ReduceOperation::Max: return "Max";
176  case ReduceOperation::Mean: return "Mean";
177  case ReduceOperation::Min: return "Min";
178  case ReduceOperation::Prod: return "Prod";
179  default: return "Unknown";
180  }
181 }

References Max, Mean, Min, Prod, and Sum.

Referenced by ConvertReduceToTosaOperator(), and StringifyLayerParameters< ReduceDescriptor >::Serialize().

◆ GetResizeMethodAsCString()

constexpr const char* armnn::GetResizeMethodAsCString ( ResizeMethod  method)
constexpr

Definition at line 285 of file TypesUtils.hpp.

286 {
287  switch (method)
288  {
289  case ResizeMethod::Bilinear: return "Bilinear";
290  case ResizeMethod::NearestNeighbor: return "NearestNeighbour";
291  default: return "Unknown";
292  }
293 }

References Bilinear, and NearestNeighbor.

Referenced by StringifyLayerParameters< ResizeDescriptor >::Serialize().

◆ GetSizePrefixedClContext()

const armnn::ClContext* armnn::GetSizePrefixedClContext ( const void *  buf)
inline

Definition at line 144 of file ClContextSchema_generated.h.

144  {
145  return flatbuffers::GetSizePrefixedRoot<armnn::ClContext>(buf);
146 }

◆ GetStatusAsCString()

constexpr char const* armnn::GetStatusAsCString ( Status  status)
constexpr

Definition at line 21 of file TypesUtils.hpp.

22 {
23  switch (status)
24  {
25  case armnn::Status::Success: return "Status::Success";
26  case armnn::Status::Failure: return "Status::Failure";
27  default: return "Unknown";
28  }
29 }

References Failure, and Success.

Referenced by operator<<().

◆ GetTensorInfo()

const TensorInfo& armnn::GetTensorInfo ( const ITensorHandle tensorHandle)
inline

float32 helpers

Definition at line 33 of file RefWorkloadUtils.hpp.

34 {
35  // We know that reference workloads use RefTensorHandles for inputs and outputs
36  const TensorHandleType* refTensorHandle =
37  PolymorphicDowncast<const TensorHandleType*>(tensorHandle);
38  return refTensorHandle->GetTensorInfo();
39 }

Referenced by BatchNormImpl(), Concatenate(), ExecuteFunction(), Split(), Splitter(), FillLayer::ValidateTensorShapesFromInputs(), SwitchLayer::ValidateTensorShapesFromInputs(), ConstantLayer::ValidateTensorShapesFromInputs(), DetectionPostProcessLayer::ValidateTensorShapesFromInputs(), SplitterLayer::ValidateTensorShapesFromInputs(), LstmLayer::ValidateTensorShapesFromInputs(), QuantizedLstmLayer::ValidateTensorShapesFromInputs(), and QLstmLayer::ValidateTensorShapesFromInputs().

◆ GetTimeDuration()

std::chrono::duration<double, std::milli> armnn::GetTimeDuration ( std::chrono::high_resolution_clock::time_point  start_time)
inline

Definition at line 19 of file Timer.hpp.

21 {
22  return std::chrono::duration<double, std::milli>(GetTimeNow() - start_time);
23 }

References GetTimeNow().

Referenced by RuntimeImpl::EnqueueWorkload(), RuntimeImpl::Execute(), and RuntimeImpl::~RuntimeImpl().

◆ GetTimeNow()

std::chrono::high_resolution_clock::time_point armnn::GetTimeNow ( )
inline

Definition at line 14 of file Timer.hpp.

15 {
16  return std::chrono::high_resolution_clock::now();
17 }

Referenced by RuntimeImpl::EnqueueWorkload(), RuntimeImpl::Execute(), GetTimeDuration(), RuntimeImpl::RuntimeImpl(), and RuntimeImpl::~RuntimeImpl().

◆ GetUnaryOperationAsCString()

constexpr char const* armnn::GetUnaryOperationAsCString ( UnaryOperation  operation)
constexpr

Definition at line 92 of file TypesUtils.hpp.

93 {
94  switch (operation)
95  {
96  case UnaryOperation::Abs: return "Abs";
97  case UnaryOperation::Ceil: return "Ceil";
98  case UnaryOperation::Exp: return "Exp";
99  case UnaryOperation::Sqrt: return "Sqrt";
100  case UnaryOperation::Rsqrt: return "Rsqrt";
101  case UnaryOperation::Neg: return "Neg";
102  case UnaryOperation::Log: return "Log";
103  case UnaryOperation::LogicalNot: return "LogicalNot";
104  case UnaryOperation::Sin: return "Sin";
105  default: return "Unknown";
106  }
107 }

References Abs, Ceil, Exp, Log, LogicalNot, Neg, Rsqrt, Sin, and Sqrt.

Referenced by StringifyLayerParameters< ElementwiseUnaryDescriptor >::Serialize().

◆ GetUnpaddedTensorStrides()

TensorShape GetUnpaddedTensorStrides ( const TensorInfo tensorInfo)

Definition at line 15 of file TensorHandle.cpp.

16 {
17  TensorShape shape(tensorInfo.GetShape());
18  auto size = GetDataTypeSize(tensorInfo.GetDataType());
19  auto runningSize = size;
20  std::vector<unsigned int> strides(shape.GetNumDimensions());
21  auto lastIdx = shape.GetNumDimensions()-1;
22  for (unsigned int i=0; i < lastIdx ; i++)
23  {
24  strides[lastIdx-i] = runningSize;
25  runningSize *= shape[lastIdx-i];
26  }
27  strides[0] = runningSize;
28  return TensorShape(shape.GetNumDimensions(), strides.data());
29 }

References TensorInfo::GetDataType(), GetDataTypeSize(), TensorShape::GetNumDimensions(), and TensorInfo::GetShape().

Referenced by TosaRefTensorHandle::GetStrides(), SampleTensorHandle::GetStrides(), RefTensorHandle::GetStrides(), ConstTensorHandle::GetStrides(), and RefTensorHandleDecorator::GetStrides().

◆ GetVersion()

const std::string GetVersion ( )

Definition at line 81 of file Utils.cpp.

82 {
83  return ARMNN_VERSION;
84 }

References ARMNN_VERSION.

◆ GpuFsaActivationCreateOp()

void GpuFsaActivationCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const ActivationDescriptor descriptor 
)

Definition at line 58 of file GpuFsaActivation.cpp.

61 {
62  GpuWorkloadSketch* sketch = blob->sketch.get();
63  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
64  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
65  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
66 
67  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
68 
69  aclInput0Info.set_are_values_constant(input.IsConstant());
70 
71  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info));
72 
73  // Validate operator, check status and update reasonIfUnsupported
74  arm_compute::Status aclStatus{};
75  switch (descriptor.m_Function)
76  {
77  case ActivationFunction::TanH:
78  {
79  aclStatus = GpuTanh::validate_op(*sketch, inputTensorInfos[0]);
80  break;
81  }
82  case ActivationFunction::Sigmoid:
83  {
84  aclStatus = GpuSigmoid::validate_op(*sketch, inputTensorInfos[0]);
85  break;
86  }
87  default:
88  throw InvalidArgumentException(std::string("Activation function currently not supported in GpuFsa: ")
89  + GetActivationFunctionAsCString(descriptor.m_Function));
90 
91  }
92  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
93  if (!supported)
94  {
95  throw BackendCapabilityException("\"GpuFsa\" backend failed during Activation layer validation");
96  }
97 
98  arm_compute::ITensorInfo* activationOutputInfo{};
99  switch (descriptor.m_Function)
100  {
101  case ActivationFunction::TanH:
102  {
103  activationOutputInfo = GpuTanh::create_op(*sketch, inputTensorInfos[0]);
104  break;
105  }
106  case ActivationFunction::Sigmoid:
107  {
108  activationOutputInfo = GpuSigmoid::create_op(*sketch, inputTensorInfos[0]);
109  break;
110  }
111  default:
112  throw InvalidArgumentException(std::string("Activation function currently not supported in GpuFsa: ")
113  + GetActivationFunctionAsCString(descriptor.m_Function));
114 
115  }
116 
117  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
118  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
119  GpuOutput::create_op(*sketch, activationOutputInfo, outputTensorInfos[0]);
120 
121  // Store the TensorInfos within the blob as unique_ptrs to be used later
122  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
123  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
124 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaActivationValidate()

arm_compute::Status GpuFsaActivationValidate ( const TensorInfo input,
const ActivationDescriptor descriptor 
)

Definition at line 22 of file GpuFsaActivation.cpp.

24 {
25  // Create a new workload sketch, for validation purposes
26  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
27  auto workloadContext = GpuWorkloadContext(&compileCtx);
28  GpuWorkloadSketch sketch{ &workloadContext };
29 
30  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
31  aclInputInfo.set_are_values_constant(input.IsConstant());
32 
33  arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
34 
35  switch (descriptor.m_Function)
36  {
37  case ActivationFunction::TanH:
38  {
39  if ( descriptor.m_A != 1 || descriptor.m_B != 1)
40  {
41  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
42  "Activation function TanH only works with a=1 and b=1");
43  }
44  return GpuTanh::validate_op(sketch, inputInfo);
45  }
46  case ActivationFunction::Sigmoid:
47  {
48  return GpuSigmoid::validate_op(sketch, inputInfo);
49  }
50  default:
51  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
52  std::string("Activation function currently not supported in GpuFsa: ")
53  + GetActivationFunctionAsCString(descriptor.m_Function));
54  }
55 
56 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaBackendId()

constexpr const char* armnn::GpuFsaBackendId ( )
constexpr

Definition at line 10 of file GpuFsaBackendId.hpp.

10 { return "GpuFsa"; }

Referenced by GpuFsaBackend::GetIdStatic().

◆ GpuFsaBatchMatMulCreateOp()

void GpuFsaBatchMatMulCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input0,
const TensorInfo input1,
const BatchMatMulDescriptor descriptor 
)

Definition at line 51 of file GpuFsaBatchMatMul.cpp.

55 {
56  GpuWorkloadSketch* sketch = blob->sketch.get();
57  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
58  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
59  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
60 
61  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
62  arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
63 
64  aclInput0Info.set_are_values_constant(input0.IsConstant());
65  aclInput1Info.set_are_values_constant(input1.IsConstant());
66 
67  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info));
68  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput1Info));
69 
70  MatMulAttributes matMulAttributes{};
71  matMulAttributes.adj_lhs(descriptor.m_TransposeX);
72  matMulAttributes.adj_rhs(descriptor.m_TransposeY);
73  GpuMatMulSettings matmulSettings{};
74  matmulSettings.m0(1);
75  matmulSettings.n0(1);
76  matmulSettings.k0(1);
77 
78  // Validate operator, check status and update reasonIfUnsupported
79  arm_compute::Status aclStatus = GpuMatMul::validate_op(*sketch,
80  inputTensorInfos[0],
81  inputTensorInfos[1],
82  matMulAttributes,
83  matmulSettings);
84 
85  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
86  if (!supported)
87  {
88  throw BackendCapabilityException("\"GpuFsa\" backend failed during elementwise binary add validation");
89  }
90 
91  arm_compute::ITensorInfo* addOutputInfo = GpuMatMul::create_op(*sketch,
92  inputTensorInfos[0],
93  inputTensorInfos[1],
94  matMulAttributes,
95  matmulSettings);
96 
97  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
98  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
99  GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
100 
101  // Store the TensorInfos within the blob as unique_ptrs to be used later
102  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
103  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
104 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaBatchMatMulValidate()

arm_compute::Status GpuFsaBatchMatMulValidate ( const TensorInfo input0,
const TensorInfo input1,
const BatchMatMulDescriptor descriptor 
)

Definition at line 22 of file GpuFsaBatchMatMul.cpp.

25 {
26  // Create a new workload sketch, for validation purposes
27  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
28  auto workloadContext = GpuWorkloadContext(&compileCtx);
29  GpuWorkloadSketch sketch{ &workloadContext };
30 
31  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
32  arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
33 
34  aclInput0Info.set_are_values_constant(input0.IsConstant());
35  aclInput1Info.set_are_values_constant(input1.IsConstant());
36 
37  arm_compute::ITensorInfo* inputInfo0 = workloadContext.create_tensor_info(aclInput0Info);
38  arm_compute::ITensorInfo* inputInfo1 = workloadContext.create_tensor_info(aclInput1Info);
39 
40  MatMulAttributes matMulAttributes{};
41  matMulAttributes.adj_lhs(descriptor.m_TransposeX);
42  matMulAttributes.adj_rhs(descriptor.m_TransposeY);
43  GpuMatMulSettings matmulSettings{};
44  matmulSettings.m0(1);
45  matmulSettings.n0(1);
46  matmulSettings.k0(1);
47 
48  return GpuMatMul::validate_op(sketch, inputInfo0, inputInfo1, matMulAttributes, matmulSettings);
49 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaCastCreateOp()

void GpuFsaCastCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const TensorInfo output 
)

Definition at line 61 of file GpuFsaCast.cpp.

64 {
65  using namespace armcomputetensorutils;
66 
67  GpuWorkloadSketch* sketch = blob->sketch.get();
68  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
69  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
70  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
71 
72  arm_compute::TensorInfo aclinputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
73 
74  aclinputInfo.set_are_values_constant(input.IsConstant());
75 
76  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclinputInfo));
77 
78  CastAttributes cast_attr = CastAttributesFromTensorInfo(output);
79 
80  // Validate operator, check status and update reasonIfUnsupported
81  arm_compute::Status aclStatus = GpuCast::validate_op(*sketch, inputTensorInfos[0], cast_attr);
82  const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK;
83  if (!validated)
84  {
85  throw BackendCapabilityException("\"" + std::string(GpuFsaBackendId())
86  + "\" backend failed during cast operator validation");
87  }
88 
89  arm_compute::ITensorInfo* castOutputInfo =
90  GpuCast::create_op(*sketch, inputTensorInfos[0], cast_attr);
91 
92  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
93  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
94  GpuOutput::create_op(*sketch, castOutputInfo, outputTensorInfos[0]);
95 
96  // Store the TensorInfos within the blob as unique_ptrs to be used later
97  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
98  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
99 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaCastValidate()

arm_compute::Status GpuFsaCastValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 33 of file GpuFsaCast.cpp.

34 {
35  using namespace armcomputetensorutils;
36 
37  // Create a new workload sketch, for validation purposes
38  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
39  auto workloadContext = GpuWorkloadContext(&compileCtx);
40  GpuWorkloadSketch sketch{ &workloadContext };
41 
42  arm_compute::TensorInfo aclinputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
43 
44  aclinputInfo.set_are_values_constant(input.IsConstant());
45 
46  arm_compute::ITensorInfo* inputInfo0 = workloadContext.create_tensor_info(aclinputInfo);
47 
48  CastAttributes cast_attr = CastAttributesFromTensorInfo(output);
49 
50  arm_compute::Status aclStatus = GpuCast::validate_op(sketch, inputInfo0, cast_attr);
51 #ifndef NDEBUG
52  const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK;
53  if (!validated)
54  {
55  std::cout << "GpuFsaCastValidate failed: " << aclStatus.error_description() << std::endl;
56  }
57 #endif
58  return aclStatus;
59 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaConstantWorkloadValidate()

arm_compute::Status GpuFsaConstantWorkloadValidate ( const TensorInfo output)

Definition at line 17 of file GpuFsaConstantWorkload.cpp.

18 {
19  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
20 
21  std::array<arm_compute::DataType,8> supportedTypes = {
22  arm_compute::DataType::F16,
23  arm_compute::DataType::F32,
24  arm_compute::DataType::QASYMM8,
25  arm_compute::DataType::QASYMM8_SIGNED,
26  arm_compute::DataType::QSYMM16,
27  arm_compute::DataType::QSYMM8,
28  arm_compute::DataType::QSYMM8_PER_CHANNEL,
29  arm_compute::DataType::S32
30  };
31  auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
32 
33  if (it != end(supportedTypes))
34  {
35  return arm_compute::Status{};
36  }
37  else
38  {
39  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
40  }
41 }

◆ GpuFsaConvolution2dCreateOp()

void GpuFsaConvolution2dCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 70 of file GpuFsaConvolution2d.cpp.

75 {
76 /*
77  * Creating an Op for the GpuFsa backend requires us to create and maintain quite a bit of data, which is then stored
78  * in a GpuFsaPreCompiledBlob for execution later. Specifically we need:
79  * GpuWorkloadContext, this contains the TensorInfos and is unique to the Graph being executed
80  * Sketch, this is similar to a subgraph and can contain one or more operations. Multiple ops can be "fused" together
81  * using a single sketch.
82  * The inputTensorinfos / outputTensorInfos, these are pointers to the TensorInfos used when creating the sketch.
83  * They refer to the TensorInfos stored within the GpuWorkloadContext and are needed when executing the sketch
84  * as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime
85  * doesn't know which Tensors to use.
86  */
87  GpuWorkloadSketch* sketch = blob->sketch.get();
88  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
89  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
90  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
91 
92  // Build and create tensor infos using the sketch
93  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
94  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
95  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
96 
97  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
98  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclWeightsInfo));
99 
100  // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op / create_op
101  arm_compute::TensorInfo aclBiasInfo;
102  arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
103 
104  if (descriptor.m_BiasEnabled)
105  {
106  if(!biases.has_value())
107  {
108  throw InvalidArgumentException("GpuFsaConvolution2d::CreateOp: No biases set when biases are enabled");
109  }
110  aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
111  aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
112 
113  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclBiasInfo));
114  biasSketchInfoPtr = inputTensorInfos[2];
115  }
116 
117  Conv2dAttributes conv2dAttributes = CreateConv2dAttributes(descriptor);
118 
119  // Validate operator, check status and update reasonIfUnsupported
120  arm_compute::Status aclStatus = GpuConv2d::validate_op(*sketch,
121  inputTensorInfos[0],
122  inputTensorInfos[1],
123  biasSketchInfoPtr,
124  conv2dAttributes);
125 
126  const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
127  if (!supported)
128  {
129  throw BackendCapabilityException("\"GpuFsa\" backend failed during Convolution2D operation validation");
130  }
131 
132  // Create the Op within the Sketch using the TensorInfos we have stored
133  arm_compute::ITensorInfo* convOutInfo = GpuConv2d::create_op(*sketch,
134  inputTensorInfos[0],
135  inputTensorInfos[1],
136  biasSketchInfoPtr,
137  conv2dAttributes);
138 
139  // Create the Output
140  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
141  GpuOutput::create_op(*sketch, convOutInfo, outputTensorInfos[0]);
142 
143  // Store the TensorInfos within the blob as unique_ptrs to be used later
144  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
145  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
146 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaConvolution2dValidate()

arm_compute::Status GpuFsaConvolution2dValidate ( const TensorInfo input,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 24 of file GpuFsaConvolution2d.cpp.

28 {
29  // Create a new workload sketch, for validation purposes
30  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
31  auto workloadContext = GpuWorkloadContext(&compileCtx);
32  GpuWorkloadSketch sketch{ &workloadContext };
33 
34  // Build and create tensor infos using the sketch
35  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
36  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
37  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
38 
39  auto inputInfo = workloadContext.create_tensor_info(aclInputInfo);
40  auto weightInfo = workloadContext.create_tensor_info(aclWeightsInfo);
41 
42  // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
43  arm_compute::TensorInfo aclBiasInfo;
44  arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
45 
46  if (descriptor.m_BiasEnabled)
47  {
48  if(!biases.has_value())
49  {
50  throw InvalidArgumentException("GpuFsaConvolution2d::ValidateOp: No biases set when biases are enabled");
51  }
52  aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
53  aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
54 
55  biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo);
56  }
57 
58  Conv2dAttributes conv2dAttributes = CreateConv2dAttributes(descriptor);
59 
60  // Validate operator, check status and update reasonIfUnsupported
61  arm_compute::Status aclStatus = GpuConv2d::validate_op(sketch,
62  inputInfo,
63  weightInfo,
64  biasSketchInfoPtr,
65  conv2dAttributes);
66 
67  return aclStatus;
68 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaDepthwiseConvolution2dCreateOp()

void GpuFsaDepthwiseConvolution2dCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 89 of file GpuFsaDepthwiseConvolution2d.cpp.

94 {
95 /*
96 * Creating an Op for the GpuFsa backend requires us to create and maintain quite a bit of data, which is then stored
97 * in a GpuFsaPreCompiledBlob for execution later. Specifically we need:
98 * GpuWorkloadContext, this contains the TensorInfos and is unique to the Graph being executed
99 * Sketch, this is similar to a subgraph and can contain one or more operations. Multiple ops can be "fused" together
100 * using a single sketch.
101 * The inputTensorinfos / outputTensorInfos, these are pointers to the TensorInfos used when creating the sketch.
102 * They refer to the TensorInfos stored within the GpuWorkloadContext and are needed when executing the sketch
103 * as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime
104 * doesn't know which Tensors to use.
105 */
106  GpuWorkloadSketch* sketch = blob->sketch.get();
107  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
108  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
109  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
110 
111  // Build and create tensor infos using the sketch
112  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
113 
114  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
115  //
116  // ACL format for weights for depthwise is:
117  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
118  // - [1, C, H, W] for [N, C, H, W] input/output layout
119  //
120  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
121  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
122  // so we do the permute here for the TensorInfo weights.
123  unsigned int aclDepthMultiplier;
124  TensorInfo weightsPermuted;
125  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
126  auto weightsShape = weightsPermuted.GetShape();
127  weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
128 
129  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
130  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
131 
132  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
133  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclWeightsInfo));
134 
135  // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
136  arm_compute::TensorInfo aclBiasInfo;
137  arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
138 
139  if (descriptor.m_BiasEnabled)
140  {
141  if(!biases.has_value())
142  {
143  throw InvalidArgumentException("GpuFsaConvolution2dValidate: No biases set when biases are enabled");
144  }
145  aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
146  aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
147 
148  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclBiasInfo));
149  biasSketchInfoPtr = inputTensorInfos[2];
150  }
151 
152  DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier);
153 
154  // Validate operator, check status and update reasonIfUnsupported
155  arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(*sketch,
156  inputTensorInfos[0],
157  inputTensorInfos[1],
158  biasSketchInfoPtr,
159  depthwiseConv2dAttributes);
160 
161  const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
162  if (!supported)
163  {
164  throw BackendCapabilityException(
165  "\"GpuFsa\" backend failed during DepthwiseConvolution2D operation validation");
166  }
167 
168  // Create the Op within the Sketch using the TensorInfos we have stored
169  arm_compute::ITensorInfo* convOutInfo = GpuDepthwiseConv2d::create_op(*sketch,
170  inputTensorInfos[0],
171  inputTensorInfos[1],
172  biasSketchInfoPtr,
173  depthwiseConv2dAttributes);
174 
175  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
176  GpuOutput::create_op(*sketch, convOutInfo, outputTensorInfos[0]);
177 
178  // Store the TensorInfos within the blob as unique_ptrs to be used later
179  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
180  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
181 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaDepthwiseConvolution2dValidate()

arm_compute::Status GpuFsaDepthwiseConvolution2dValidate ( const TensorInfo input,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 26 of file GpuFsaDepthwiseConvolution2d.cpp.

30 {
31  // Create a new workload sketch, for validation purposes
32  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
33  auto workloadContext = GpuWorkloadContext(&compileCtx);
34  GpuWorkloadSketch sketch{ &workloadContext };
35 
36  // Build and create tensor infos using the sketch
37  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
38 
39  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
40  //
41  // ACL format for weights for depthwise is:
42  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
43  // - [1, C, H, W] for [N, C, H, W] input/output layout
44  //
45  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
46  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
47  // so we do the permute here for the TensorInfo weights.
48  unsigned int aclDepthMultiplier;
49  TensorInfo weightsPermuted;
50  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
51  auto weightsShape = weightsPermuted.GetShape();
52  weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
53 
54  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
55  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
56 
57  auto inputInfo = workloadContext.create_tensor_info(aclInputInfo);
58  auto weightInfo = workloadContext.create_tensor_info(aclWeightsInfo);
59 
60  // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
61  arm_compute::TensorInfo aclBiasInfo;
62  arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
63 
64  if (descriptor.m_BiasEnabled)
65  {
66  if(!biases.has_value())
67  {
68  throw InvalidArgumentException(
69  "GpuFsaDepthwiseConvolution2dValidate: No biases set when biases are enabled");
70  }
71  aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
72  aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
73 
74  biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo);
75  }
76 
77  DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier);
78 
79  // Validate operator, check status and update reasonIfUnsupported
80  arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(sketch,
81  inputInfo,
82  weightInfo,
83  biasSketchInfoPtr,
84  depthwiseConv2dAttributes);
85 
86  return aclStatus;
87 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaElementwiseBinaryCreateOp()

void GpuFsaElementwiseBinaryCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input0,
const TensorInfo input1,
const ElementwiseBinaryDescriptor descriptor 
)

Definition at line 63 of file GpuFsaElementwiseBinary.cpp.

67 {
68  GpuWorkloadSketch* sketch = blob->sketch.get();
69  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
70  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
71  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
72 
73  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
74  arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
75 
76  aclInput0Info.set_are_values_constant(input0.IsConstant());
77  aclInput1Info.set_are_values_constant(input1.IsConstant());
78 
79  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info));
80  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput1Info));
81 
82  // Validate operator, check status and update reasonIfUnsupported
83  // Validate operator, check status and update reasonIfUnsupported
84  arm_compute::Status aclStatus{};
85  switch (descriptor.m_Operation)
86  {
87  case BinaryOperation::Add:
88  {
89  aclStatus = GpuAdd::validate_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
90  break;
91  }
92  case BinaryOperation::Mul:
93  {
94  aclStatus = GpuMul::validate_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
95  break;
96  }
97  case BinaryOperation::Sub:
98  {
99  aclStatus = GpuSub::validate_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
100  break;
101  }
102  default:
103  throw InvalidArgumentException(std::string("Elementwise Binary operation not supported in GpuFsa: ")
104  + GetBinaryOperationAsCString(descriptor.m_Operation));
105  }
106 
107  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
108  if (!supported)
109  {
110  throw BackendCapabilityException("\"GpuFsa\" backend failed during elementwise binary add validation");
111  }
112 
113  arm_compute::ITensorInfo* elementwiseBinaryOutputInfo{};
114  switch (descriptor.m_Operation)
115  {
116  case BinaryOperation::Add:
117  {
118  elementwiseBinaryOutputInfo = GpuAdd::create_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
119  break;
120  }
121  case BinaryOperation::Mul:
122  {
123  elementwiseBinaryOutputInfo = GpuMul::create_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
124  break;
125  }
126  case BinaryOperation::Sub:
127  {
128  elementwiseBinaryOutputInfo = GpuSub::create_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
129  break;
130  }
131  default:
132  throw InvalidArgumentException(std::string("Elementwise Binary operation not supported in GpuFsa: ")
133  + GetBinaryOperationAsCString(descriptor.m_Operation));
134  }
135 
136  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
137  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
138  GpuOutput::create_op(*sketch, elementwiseBinaryOutputInfo, outputTensorInfos[0]);
139 
140  // Store the TensorInfos within the blob as unique_ptrs to be used later
141  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
142  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
143 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaElementwiseBinaryValidate()

arm_compute::Status GpuFsaElementwiseBinaryValidate ( const TensorInfo input0,
const TensorInfo input1,
const ElementwiseBinaryDescriptor descriptor 
)

Definition at line 24 of file GpuFsaElementwiseBinary.cpp.

27 {
28  // Create a new workload sketch, for validation purposes
29  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
30  auto workloadContext = GpuWorkloadContext(&compileCtx);
31  GpuWorkloadSketch sketch{ &workloadContext };
32 
33  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
34  arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
35 
36  aclInput0Info.set_are_values_constant(input0.IsConstant());
37  aclInput1Info.set_are_values_constant(input1.IsConstant());
38 
39  arm_compute::ITensorInfo* inputInfo0 = workloadContext.create_tensor_info(aclInput0Info);
40  arm_compute::ITensorInfo* inputInfo1 = workloadContext.create_tensor_info(aclInput1Info);
41 
42  switch (descriptor.m_Operation)
43  {
44  case BinaryOperation::Add:
45  {
46  return GpuAdd::validate_op(sketch, inputInfo0, inputInfo1);
47  }
48  case BinaryOperation::Mul:
49  {
50  return GpuMul::validate_op(sketch, inputInfo0, inputInfo1);
51  }
52  case BinaryOperation::Sub:
53  {
54  return GpuSub::validate_op(sketch, inputInfo0, inputInfo1);
55  }
56  default:
57  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
58  std::string("Elementwise Binary operation not supported in GpuFsa: ")
59  + GetBinaryOperationAsCString(descriptor.m_Operation));
60  }
61 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaPooling2dCreateOp()

void GpuFsaPooling2dCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const Pooling2dDescriptor descriptor 
)

Definition at line 40 of file GpuFsaPooling2d.cpp.

43 {
44  GpuWorkloadSketch* sketch = blob->sketch.get();
45  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
46  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
47  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
48 
49  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
50  aclInputInfo.set_are_values_constant(input.IsConstant());
51 
52  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
53 
54  Pool2dAttributes pool2dAttributes = CreatePool2dAttributes(descriptor);
55  GpuPool2dSettings pool2dSettings{};
56 
57  // Validate operator, check status and update reasonIfUnsupported
58  arm_compute::Status aclStatus = GpuPool2d::validate_op(*sketch,
59  inputTensorInfos[0],
60  pool2dAttributes,
61  pool2dSettings);
62 
63  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
64  if (!supported)
65  {
66  throw BackendCapabilityException("\"GpuFsa\" backend failed during pooling 2d validation");
67  }
68 
69  arm_compute::ITensorInfo* addOutputInfo = GpuPool2d::create_op(*sketch,
70  inputTensorInfos[0],
71  pool2dAttributes,
72  pool2dSettings);
73 
74  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
75  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
76  GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
77 
78  // Store the TensorInfos within the blob as unique_ptrs to be used later
79  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
80  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
81 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaPooling2dValidate()

arm_compute::Status GpuFsaPooling2dValidate ( const TensorInfo input,
const Pooling2dDescriptor descriptor 
)

Definition at line 22 of file GpuFsaPooling2d.cpp.

24 {
25  // Create a new workload sketch, for validation purposes
26  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
27  auto workloadContext = GpuWorkloadContext(&compileCtx);
28  GpuWorkloadSketch sketch{ &workloadContext };
29 
30  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
31  aclInputInfo.set_are_values_constant(input.IsConstant());
32  arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
33 
34  Pool2dAttributes pool2dAttributes = CreatePool2dAttributes(descriptor);
35  GpuPool2dSettings pool2dSettings{};
36 
37  return GpuPool2d::validate_op(sketch, inputInfo, pool2dAttributes, pool2dSettings);
38 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaPreCompiledWorkloadValidate()

bool armnn::GpuFsaPreCompiledWorkloadValidate ( std::string *  reasonIfUnsupported)

◆ GpuFsaReshapeCreateOp()

void GpuFsaReshapeCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const ReshapeDescriptor descriptor 
)

Definition at line 49 of file GpuFsaReshape.cpp.

50 {
51  GpuWorkloadSketch* sketch = blob->sketch.get();
52  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
53 
54  std::vector<arm_compute::ITensorInfo*> inputTensorInfos;
55  std::vector<arm_compute::ITensorInfo*> outputTensorInfos;
56 
57  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
58 
59  aclInputInfo.set_are_values_constant(input.IsConstant());
60 
61  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
62 
63  ReshapeAttributes attributes;
64  attributes.shape(BuildArmComputeTensorShape(descriptor.m_TargetShape));
65 
66  arm_compute::ITensorInfo* addOutputInfo = GpuReshape::create_op(*sketch, inputTensorInfos[0], attributes);
67 
68  // Temporary fix until fusing attempt is made for GpuFsa backend and outputLayer workoad is created
69  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
70  GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
71 
72  // Store the tensorInfos within the blob as std::unique_ptr<> so they can be used later
73  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
74  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
75 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaReshapeValidate()

arm_compute::Status GpuFsaReshapeValidate ( const TensorInfo input,
const ReshapeDescriptor descriptor 
)

Definition at line 22 of file GpuFsaReshape.cpp.

23 {
24  auto compileContext = arm_compute::CLKernelLibrary::get().get_compile_context();
25  auto workloadContext = GpuWorkloadContext(&compileContext);
26 
27  GpuWorkloadSketch sketch(&workloadContext);
28 
29  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
30  aclInputInfo.set_are_values_constant(input.IsConstant());
31 
32  arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
33 
34  ReshapeAttributes attributes;
35  attributes.shape(BuildArmComputeTensorShape(descriptor.m_TargetShape));
36 
37  arm_compute::Status aclStatus = GpuReshape::validate_op(sketch, inputInfo, attributes);
38 
39 #ifndef NDEBUG
40  if (aclStatus.error_code() != arm_compute::ErrorCode::OK)
41  {
42  std::cout << "GpuFsaReshapeValidate failed: " << aclStatus.error_description() << std::endl;
43  }
44 #endif
45 
46  return aclStatus;
47 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaResizeCreateOp()

void GpuFsaResizeCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const ResizeDescriptor descriptor 
)

Definition at line 39 of file GpuFsaResize.cpp.

42 {
43  GpuWorkloadSketch* sketch = blob->sketch.get();
44  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
45  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
46  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
47 
48  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
49  aclInputInfo.set_are_values_constant(input.IsConstant());
50 
51  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
52 
53  ResizeAttributes resizeAttributes = CreateResizeAttributes(descriptor);
54 
55  // Validate operator, check status and update reasonIfUnsupported
56  arm_compute::Status aclStatus = GpuResize::validate_op(*sketch,
57  inputTensorInfos[0],
58  resizeAttributes);
59 
60  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
61  if (!supported)
62  {
63  throw BackendCapabilityException("\"GpuFsa\" backend failed during resize validation");
64  }
65 
66  arm_compute::ITensorInfo* addOutputInfo = GpuResize::create_op(*sketch,
67  inputTensorInfos[0],
68  resizeAttributes);
69 
70  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
71  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
72  GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
73 
74  // Store the TensorInfos within the blob as unique_ptrs to be used later
75  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
76  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
77 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaResizeValidate()

arm_compute::Status GpuFsaResizeValidate ( const TensorInfo input,
const ResizeDescriptor descriptor 
)

Definition at line 22 of file GpuFsaResize.cpp.

24 {
25  // Create a new workload sketch, for validation purposes
26  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
27  auto workloadContext = GpuWorkloadContext(&compileCtx);
28  GpuWorkloadSketch sketch{ &workloadContext };
29 
30  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
31  aclInputInfo.set_are_values_constant(input.IsConstant());
32  arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
33 
34  ResizeAttributes resizeAttributes = CreateResizeAttributes(descriptor);
35 
36  return GpuResize::validate_op(sketch, inputInfo, resizeAttributes);
37 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaSoftmaxCreateOp()

void GpuFsaSoftmaxCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor 
)

Definition at line 63 of file GpuFsaSoftmax.cpp.

67 {
68  GpuWorkloadSketch* sketch = blob->sketch.get();
69  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
70  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
71  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
72 
73  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
74  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, output.GetNumDimensions());
75  aclInputInfo.set_are_values_constant(input.IsConstant());
76  aclOutputInfo.set_are_values_constant(output.IsConstant());
77 
78  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
79  outputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclOutputInfo));
80 
81  // Set Softmax attributes using descriptor
82  SoftmaxAttributes softmaxAttributes{};
83  softmaxAttributes.beta(descriptor.m_Beta); // Only used for LogSoftmax else default
84  softmaxAttributes.is_log_softmax(false); // Use Softmax not LogSoftmax
85  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
86  softmaxAttributes.axis(aclAxis);
87 
88  // Validate operator, check status and update reasonIfUnsupported
89  arm_compute::Status aclStatus = GpuSoftmax::validate_op(*sketch,
90  inputTensorInfos[0],
91  outputTensorInfos[0],
92  softmaxAttributes);
93  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
94  if (!supported)
95  {
96  throw BackendCapabilityException("\"GpuFsa\" backend failed during softmax validation");
97  }
98 
99  GpuSoftmax::create_op(*sketch, inputTensorInfos[0], outputTensorInfos[0], softmaxAttributes);
100 
101  // Store the TensorInfos within the blob as unique_ptrs to be used later
102  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
103  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
104 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaSoftmaxValidate()

arm_compute::Status GpuFsaSoftmaxValidate ( const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor 
)

Definition at line 22 of file GpuFsaSoftmax.cpp.

25 {
26  // Create a new workload sketch, for validation purposes
27  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
28  auto workloadContext = GpuWorkloadContext(&compileCtx);
29  GpuWorkloadSketch sketch{ &workloadContext };
30 
31  // Build and create tensor infos using the sketch
32  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
33  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, output.GetNumDimensions());
34  aclInputInfo.set_are_values_constant(input.IsConstant());
35  aclOutputInfo.set_are_values_constant(output.IsConstant());
36  arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
37  arm_compute::ITensorInfo* outputInfo = workloadContext.create_tensor_info(aclOutputInfo);
38 
39  // Set Softmax attributes using descriptor
40  SoftmaxAttributes softmaxAttributes{};
41  softmaxAttributes.beta(descriptor.m_Beta);
42  softmaxAttributes.is_log_softmax(false); // Use Softmax not LogSoftmax
43  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
44  softmaxAttributes.axis(aclAxis);
45 
46  // Validate operator, check status and update reasonIfUnsupported
47  arm_compute::Status aclStatus = GpuSoftmax::validate_op(sketch,
48  inputInfo,
49  outputInfo,
50  softmaxAttributes);
51 
52 #ifndef NDEBUG
53  const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK;
54  if (!validated)
55  {
56  std::cout << "GpuFsaSoftmaxValidate failed: " << aclStatus.error_description() << std::endl;
57  }
58 #endif
59 
60  return aclStatus;
61 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaTensorHandleFactoryId()

constexpr const char* armnn::GpuFsaTensorHandleFactoryId ( )
constexpr

Definition at line 14 of file GpuFsaTensorHandleFactory.hpp.

14 { return "Arm/GpuFsa/TensorHandleFactory"; }

Referenced by GpuFsaTensorHandleFactory::GetIdStatic().

◆ HasCapability() [1/4]

bool HasCapability ( const BackendOptions::BackendOption backendOption,
const armnn::BackendId backend 
)

Convenience function to check if a given capability matches a capability in a backend.

Definition at line 80 of file BackendHelper.cpp.

81 {
82  return HasMatchingCapability(backendOption, backend);
83 }

References HasMatchingCapability().

◆ HasCapability() [2/4]

bool HasCapability ( const BackendOptions::BackendOption capability,
const BackendCapabilities capabilities 
)

Convenience function to check if a given capability matches a capability in a BackendCapabilities struct.

Definition at line 75 of file BackendHelper.cpp.

76 {
77  return HasMatchingCapability(capability, capabilities);
78 }

References HasMatchingCapability().

◆ HasCapability() [3/4]

bool HasCapability ( const std::string &  name,
const armnn::BackendId backend 
)

Convenience function to check if a capability exists in a backend.

Definition at line 70 of file BackendHelper.cpp.

71 {
72  return GetCapability(name, backend).has_value();
73 }

References GetCapability().

◆ HasCapability() [4/4]

bool HasCapability ( const std::string &  name,
const BackendCapabilities capabilities 
)

Convenience function to check if a capability exists in a BackendCapabilites struct.

Definition at line 65 of file BackendHelper.cpp.

66 {
67  return GetCapability(name, capabilities).has_value();
68 }

References GetCapability().

◆ HasMatchingCapability() [1/2]

bool HasMatchingCapability ( const BackendOptions::BackendOption backendOption,
const armnn::BackendId backend 
)

Convenience function to check if a given capability matches a capability in a backend.

Definition at line 117 of file BackendHelper.cpp.

118 {
119  auto const& backendRegistry = armnn::BackendRegistryInstance();
120  if (backendRegistry.IsBackendRegistered(backend))
121  {
122  auto factoryFunc = backendRegistry.GetFactory(backend);
123  auto backendObject = factoryFunc();
124  auto capabilities = backendObject->GetCapabilities();
125  return HasMatchingCapability(backendOption, capabilities);
126  }
127  return false;
128 }

References BackendRegistryInstance(), and HasMatchingCapability().

◆ HasMatchingCapability() [2/2]

bool HasMatchingCapability ( const BackendOptions::BackendOption capability,
const BackendCapabilities capabilities 
)

Convenience function to check if a given capability matches a capability in a BackendCapabilities struct.

Definition at line 85 of file BackendHelper.cpp.

86 {
87  for (size_t i=0; i < capabilities.GetOptionCount(); i++)
88  {
89  const auto& backendCapability = capabilities.GetOption(i);
90  if (capability.GetName() == backendCapability.GetName())
91  {
92  if (capability.GetValue().IsBool() && backendCapability.GetValue().IsBool())
93  {
94  return capability.GetValue().AsBool() == backendCapability.GetValue().AsBool();
95  }
96  else if (capability.GetValue().IsFloat() && backendCapability.GetValue().IsFloat())
97  {
98  return capability.GetValue().AsFloat() == backendCapability.GetValue().AsFloat();
99  }
100  else if (capability.GetValue().IsInt() && backendCapability.GetValue().IsInt())
101  {
102  return capability.GetValue().AsInt() == backendCapability.GetValue().AsInt();
103  }
104  else if (capability.GetValue().IsString() && backendCapability.GetValue().IsString())
105  {
106  return capability.GetValue().AsString() == backendCapability.GetValue().AsString();
107  }
108  else if (capability.GetValue().IsUnsignedInt() && backendCapability.GetValue().IsUnsignedInt())
109  {
110  return capability.GetValue().AsUnsignedInt() == backendCapability.GetValue().AsUnsignedInt();
111  }
112  }
113  }
114  return false;
115 }

References BackendOptions::Var::AsBool(), BackendOptions::Var::AsFloat(), BackendOptions::Var::AsInt(), BackendOptions::Var::AsString(), BackendOptions::Var::AsUnsignedInt(), BackendOptions::BackendOption::GetName(), BackendOptions::GetOption(), BackendOptions::GetOptionCount(), BackendOptions::BackendOption::GetValue(), BackendOptions::Var::IsBool(), BackendOptions::Var::IsFloat(), BackendOptions::Var::IsInt(), BackendOptions::Var::IsString(), and BackendOptions::Var::IsUnsignedInt().

Referenced by ArmnnDevice::ArmnnDevice(), CheckFp16Support(), HasCapability(), HasMatchingCapability(), LoadedNetwork::ImportInputs(), LoadedNetwork::ImportOutputs(), and RuntimeImpl::RuntimeImpl().

◆ IgnoreUnused()

void armnn::IgnoreUnused ( Ts &&  ...)
inline

Definition at line 14 of file IgnoreUnused.hpp.

14 {}

Referenced by ClBackendDefaultAllocator::allocate(), DefaultAllocator::allocate(), GpuFsaBackendDefaultAllocator::allocate(), ArgMinMax(), CalculateSlotOptionForOutput(), ITensorHandle::CanBeImported(), ClTensorHandle::CanBeImported(), ClConvolution3dWorkload::ClConvolution3dWorkload(), SpaceToDepthLayer::Clone(), SpaceToBatchNdLayer::Clone(), DynamicBackendUtils::CloseHandle(), ConstTensorPin::ConstTensorPin(), IBackendInternal::CreateExecutionData(), RefTensorHandleFactory::CreateSubTensorHandle(), TosaRefTensorHandleFactory::CreateSubTensorHandle(), SampleDynamicTensorHandleFactory::CreateSubTensorHandle(), SampleDynamicWorkloadFactory::CreateSubTensorHandle(), TosaRefWorkloadFactory::CreateSubTensorHandle(), RefWorkloadFactory::CreateSubTensorHandle(), RefTensorHandleFactory::CreateTensorHandle(), TosaRefTensorHandleFactory::CreateTensorHandle(), SampleDynamicTensorHandleFactory::CreateTensorHandle(), MockTensorHandleFactory::CreateTensorHandle(), ClWorkloadFactory::CreateTensorHandle(), TosaRefWorkloadFactory::CreateTensorHandle(), RefWorkloadFactory::CreateTensorHandle(), ITensorHandleFactory::CreateTensorHandle(), OutputLayer::CreateTensorHandles(), OutputLayer::CreateWorkload(), MergeLayer::CreateWorkload(), UnmapLayer::CreateWorkload(), InputLayer::CreateWorkload(), MapLayer::CreateWorkload(), MemCopyLayer::CreateWorkload(), MemImportLayer::CreateWorkload(), StandInLayer::CreateWorkload(), IBackendInternal::CreateWorkloadFactory(), ITensorHandle::DecorateTensorHandle(), SelectiveQuantizer< T, false >::Dequantize(), SelectiveQuantizer< armnn::Half, false >::Dequantize(), SelectiveQuantizer< armnn::BFloat16, false >::Dequantize(), SerializerStrategy::ExecuteStrategy(), UnmapLayer::ExecuteStrategy(), MapLayer::ExecuteStrategy(), MemImportLayer::ExecuteStrategy(), FakeQuantizationLayer::ExecuteStrategy(), StrategyBase< DefaultStrategy >::ExecuteStrategy(), ExecutionFrame::ExecuteWorkloads(), FalseFunc(), FalseFuncF16(), FalseFuncF32(), FalseFuncI32(), FalseFuncU8(), FalseInputFuncF16(), FalseInputFuncF32(), FalseOutputFuncF16(), FalseOutputFuncF32(), Gather(), ClImportTensorHandleFactory::GetCapabilities(), NeonTensorHandleFactory::GetCapabilities(), ITensorHandleFactory::GetCapabilities(), DynamicBackendUtils::GetEntryPoint(), DefaultAllocator::GetMemoryRegionAtOffset(), ClBackendDefaultAllocator::GetMemoryRegionAtOffset(), GpuFsaBackendDefaultAllocator::GetMemoryRegionAtOffset(), ICustomAllocator::GetMemoryRegionAtOffset(), IDeserializer::DeserializerImpl::GetNetworkInputBindingInfo(), IDeserializer::DeserializerImpl::GetNetworkOutputBindingInfo(), IDeserializer::DeserializerImpl::GetNormalizationDescriptor(), IDeserializer::DeserializerImpl::GetPooling2dDescriptor(), IDeserializer::DeserializerImpl::GetPooling3dDescriptor(), DynamicBackendUtils::GetSharedObjects(), ITensorHandle::Import(), ClTensorHandle::Import(), SliceLayer::InferOutputShapes(), StandInLayer::InferOutputShapes(), ReshapeLayer::InferOutputShapes(), NeonLayerSupport::IsActivationSupported(), RefLayerSupport::IsArgMinMaxSupported(), RefLayerSupport::IsBatchMatMulSupported(), RefLayerSupport::IsBatchNormalizationSupported(), RefLayerSupport::IsBatchToSpaceNdSupported(), RefLayerSupport::IsBroadcastToSupported(), RefLayerSupport::IsChannelShuffleSupported(), RefLayerSupport::IsComparisonSupported(), RefLayerSupport::IsConcatSupported(), RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDepthToSpaceSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), RefLayerSupport::IsDetectionPostProcessSupported(), RefLayerSupport::IsElementwiseUnarySupported(), RefLayerSupport::IsFakeQuantizationSupported(), NeonLayerSupport::IsFillSupported(), ClLayerSupport::IsFillSupported(), RefLayerSupport::IsFillSupported(), NeonLayerSupport::IsFloorSupported(), RefLayerSupport::IsFloorSupported(), RefLayerSupport::IsGatherSupported(), IsGpuFsaBackendSupported(), RefLayerSupport::IsInstanceNormalizationSupported(), RefLayerSupport::IsL2NormalizationSupported(), TosaRefLayerSupport::IsLayerSupported(), GpuFsaLayerSupport::IsLayerSupported(), ILayerSupport::IsLayerSupported(), ClLayerSupport::IsLogicalBinarySupported(), RefLayerSupport::IsLogicalBinarySupported(), RefLayerSupport::IsLogSoftmaxSupported(), RefLayerSupport::IsLstmSupported(), RefLayerSupport::IsNormalizationSupported(), RefLayerSupport::IsPadSupported(), RefLayerSupport::IsPermuteSupported(), RefLayerSupport::IsPooling2dSupported(), RefLayerSupport::IsPooling3dSupported(), RefLayerSupport::IsQLstmSupported(), RefLayerSupport::IsRankSupported(), RefLayerSupport::IsReduceSupported(), ClLayerSupport::IsReshapeSupported(), NeonLayerSupport::IsReshapeSupported(), RefLayerSupport::IsReshapeSupported(), RefLayerSupport::IsResizeSupported(), RefLayerSupport::IsScatterNdSupported(), RefLayerSupport::IsShapeSupported(), RefLayerSupport::IsSliceSupported(), RefLayerSupport::IsSoftmaxSupported(), RefLayerSupport::IsSpaceToBatchNdSupported(), RefLayerSupport::IsSpaceToDepthSupported(), NeonLayerSupport::IsSplitterSupported(), ClLayerSupport::IsSplitterSupported(), RefLayerSupport::IsSplitterSupported(), RefLayerSupport::IsStackSupported(), RefLayerSupport::IsStridedSliceSupported(), RefLayerSupport::IsTileSupported(), RefLayerSupport::IsTransposeConvolution2dSupported(), RefLayerSupport::IsTransposeSupported(), RefLayerSupport::IsUnidirectionalSequenceLstmSupported(), Layer::Layer(), ClImportTensorHandle::Map(), ClBackend::ClBackendCustomAllocatorMemoryRegion::map(), GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::map(), ClImportSubTensorHandle::Map(), NeonConvertFp16ToFp32WorkloadValidate(), NeonConvertFp32ToFp16WorkloadValidate(), NeonConvolution3dWorkload::NeonConvolution3dWorkload(), DynamicBackendUtils::OpenHandle(), SelectiveQuantizer< T, false >::Quantize(), SelectiveQuantizer< armnn::Half, false >::Quantize(), SelectiveQuantizer< armnn::BFloat16, false >::Quantize(), BaseWorkload< SplitterQueueDescriptor >::ReplaceInputTensorHandle(), BaseWorkload< SplitterQueueDescriptor >::ReplaceOutputTensorHandle(), OptimizeInverseConversionsImpl::Run(), OptimizeInversePermutesImpl< PermuteType >::Run(), SquashEqualSiblingsImpl< Comparable >::Run(), FuseBatchNorm< ConvLayer, ArmnnType, T >::Run(), ConvertConstants< Converter, Predicate >::Run(), ClImportTensorHandle::SetMemoryGroup(), ClImportSubTensorHandle::SetMemoryGroup(), OpenClTimer::Start(), MemoryManager::StoreMemToAllocate(), TrueFunc(), ClBackend::ClBackendCustomAllocatorMemoryRegion::unmap(), GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::unmap(), IBackendInternal::UpdateExecutionData(), ClBackend::UseCustomMemoryAllocator(), IBackendInternal::UseCustomMemoryAllocator(), WorkingMemHandle::WorkingMemHandle(), Graph::LayerInGraph< InputLayer >::~LayerInGraph(), and Graph::LayerInGraph< OutputLayer >::~LayerInGraph().

◆ IndexToCoordinates()

std::vector<uint32_t> armnn::IndexToCoordinates ( std::vector< uint32_t > &  shape,
uint32_t  index 
)

Definition at line 16 of file Tile.cpp.

17 {
18  std::vector<uint32_t> coordinates;
19  // Iterating through dimensions starting from the last dimension to the first
20  for (std::size_t i = shape.size() - 1; i < shape.size(); --i)
21  {
22  // Coordinate is found by getting the index and modulus it by the current dimension size
23  // shape of dimension = dimension size
24  coordinates.insert(coordinates.begin(), index % shape[i]);
25  // Pass the index to next iteration making index = index / size of the current dimension
26  index = index/shape[i];
27  }
28  return coordinates;
29 }

Referenced by Tile().

◆ InitializeArmComputeClTensorData()

void InitializeArmComputeClTensorData ( arm_compute::CLTensor &  clTensor,
const ConstTensorHandle handle 
)
inline

Definition at line 124 of file ClWorkloadUtils.hpp.

126 {
127  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(handle, "Null tensor handle passed to InitializeArmComputeTensorData.");
128  armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor);
129  switch(handle->GetTensorInfo().GetDataType())
130  {
131  case DataType::Float16:
132  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::Half>());
133  break;
134  case DataType::Float32:
135  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<float>());
136  break;
137  case DataType::QAsymmU8:
138  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<uint8_t>());
139  break;
140  case DataType::QAsymmS8:
141  case DataType::QSymmS8:
142  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int8_t>());
143  break;
144  case DataType::QSymmS16:
145  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int16_t>());
146  break;
147  case DataType::Signed32:
148  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int32_t>());
149  break;
150  case DataType::BFloat16:
151  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::BFloat16>());
152  break;
153  default:
154  // Throw exception; assertion not called in release build.
155  throw Exception("Unexpected tensor type during InitializeArmComputeClTensorData().");
156  }
157 };

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE.

◆ InitializeArmComputeTensorData() [1/2]

void armnn::InitializeArmComputeTensorData ( arm_compute::Tensor &  tensor,
const ConstTensorHandle handle 
)
inline

Definition at line 104 of file NeonWorkloadUtils.hpp.

106 {
107  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(handle, "Null tensor handle passed to InitializeArmComputeTensorData.");
108  switch(handle->GetTensorInfo().GetDataType())
109  {
110  case DataType::Float16:
111  CopyArmComputeTensorData(tensor, handle->GetConstTensor<armnn::Half>());
112  break;
113  case DataType::Float32:
114  CopyArmComputeTensorData(tensor, handle->GetConstTensor<float>());
115  break;
116  case DataType::QAsymmU8:
117  CopyArmComputeTensorData(tensor, handle->GetConstTensor<uint8_t>());
118  break;
119  case DataType::QSymmS8:
120  case DataType::QAsymmS8:
121  CopyArmComputeTensorData(tensor, handle->GetConstTensor<int8_t>());
122  break;
123  case DataType::Signed32:
124  CopyArmComputeTensorData(tensor, handle->GetConstTensor<int32_t>());
125  break;
126  case DataType::QSymmS16:
127  CopyArmComputeTensorData(tensor, handle->GetConstTensor<int16_t>());
128  break;
129  case DataType::BFloat16:
130  CopyArmComputeTensorData(tensor, handle->GetConstTensor<armnn::BFloat16>());
131  break;
132  default:
133  // Throw exception; assertion not called in release build.
134  throw Exception("Unexpected tensor type during InitializeArmComputeTensorData().");
135  }
136 };

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, BFloat16, CopyArmComputeTensorData(), Float16, Float32, ConstTensorHandle::GetConstTensor(), TensorInfo::GetDataType(), ConstTensorHandle::GetTensorInfo(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ InitializeArmComputeTensorData() [2/2]

void armnn::InitializeArmComputeTensorData ( arm_compute::Tensor &  tensor,
TensorInfo  tensorInfo,
const ITensorHandle handle 
)
inline

Definition at line 69 of file NeonWorkloadUtils.hpp.

72 {
73  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(handle, "Null tensor handle passed to InitializeArmComputeTensorData.");
74  switch(tensorInfo.GetDataType())
75  {
76  case DataType::Float16:
77  CopyArmComputeTensorData(tensor, reinterpret_cast<const armnn::Half*>(handle->Map()));
78  break;
79  case DataType::Float32:
80  CopyArmComputeTensorData(tensor, reinterpret_cast<const float*>(handle->Map()));
81  break;
82  case DataType::QAsymmU8:
83  CopyArmComputeTensorData(tensor, reinterpret_cast<const uint8_t*>(handle->Map()));
84  break;
85  case DataType::QSymmS8:
86  case DataType::QAsymmS8:
87  CopyArmComputeTensorData(tensor, reinterpret_cast<const int8_t*>(handle->Map()));
88  break;
89  case DataType::Signed32:
90  CopyArmComputeTensorData(tensor, reinterpret_cast<const int32_t*>(handle->Map()));
91  break;
92  case DataType::QSymmS16:
93  CopyArmComputeTensorData(tensor, reinterpret_cast<const int16_t*>(handle->Map()));
94  break;
95  case DataType::BFloat16:
96  CopyArmComputeTensorData(tensor, reinterpret_cast<const armnn::BFloat16*>(handle->Map()));
97  break;
98  default:
99  // Throw exception; assertion not called in release build.
100  throw Exception("Unexpected tensor type during InitializeArmComputeTensorData().");
101  }
102 };

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, BFloat16, CopyArmComputeTensorData(), Float16, Float32, TensorInfo::GetDataType(), ITensorHandle::Map(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

Referenced by NeonFullyConnectedWorkload::Execute(), and NeonConvolution2dWorkload::Execute().

◆ InsertConvertFp16ToFp32LayersBefore()

std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore ( Graph graph,
Layer layer,
bool  expectCorrectInputType 
)

Definition at line 40 of file NetworkUtils.cpp.

43 {
44  std::vector<ConvertFp16ToFp32Layer*> convertLayers;
45  convertLayers.reserve(layer.GetNumInputSlots());
46 
47  // Insert a ConvertFp16ToFp32Layer before each input slot
48  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
49  {
50  bool allowInsert = true;
51  if (expectCorrectInputType)
52  {
53  // Only insert ConvertFp16ToFp32Layer before FP16 input slots
54  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
55  allowInsert =
56  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
57  }
58 
59  if (allowInsert)
60  {
61  const std::string name =
62  std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
63  layer.GetName();
64  ConvertFp16ToFp32Layer* convertLayer =
65  graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
66 
67  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
68  convertInfo.SetDataType(DataType::Float32);
69 
70  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
71 
72  convertLayers.emplace_back(convertLayer);
73  }
74  }
75 
76  return convertLayers;
77 }

References Layer::BeginInputSlots(), Layer::EndInputSlots(), Float16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment(), and ConvertFp32NetworkToFp16Impl::Run().

◆ InsertConvertFp32ToFp16LayersAfter()

std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter ( Graph graph,
Layer layer 
)

Definition at line 79 of file NetworkUtils.cpp.

80 {
81  const unsigned int numOutputSlots = layer.GetNumOutputSlots();
82 
83  std::vector<ConvertFp32ToFp16Layer*> convertLayers;
84  convertLayers.reserve(numOutputSlots);
85 
86  // Update FP16 output slots to FP32 on current layer
87  ChangeOutputFp16ToFp32(layer);
88 
89  // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
90  for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
91  {
92  OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
93  if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
94  {
95  const std::string name =
96  std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
97  ConvertFp32ToFp16Layer* convertLayer =
98  graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
99 
100  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
101  convertInfo.SetDataType(DataType::Float16);
102 
103  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
104 
105  convertLayers.emplace_back(convertLayer);
106  }
107  }
108 
109  return convertLayers;
110 }

References Float16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment(), and ConvertFp32NetworkToFp16Impl::Run().

◆ InsertDebugLayerAfter()

std::vector< DebugLayer * > InsertDebugLayerAfter ( Graph graph,
Layer layer,
bool  toFile 
)

Definition at line 112 of file NetworkUtils.cpp.

113 {
114  std::vector<DebugLayer*> debugLayers;
115  debugLayers.reserve(layer.GetNumOutputSlots());
116 
117  // Connect a DebugLayer to each output slot of the layer
118  uint32_t outputSlotIdx = 0;
119  for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
120  {
121  const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr() + "_" +
122  std::to_string(outputSlotIdx);
123 
124  DebugLayer* debugLayer =
125  graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str(), toFile);
126 
127  // Sets output tensor info for the debug layer.
128  if (debugLayer->GetInputSlot(0).GetConnectedOutputSlot() != &(*outputSlot))
129  {
130  throw armnn::Exception("unable to set output tensor info for the debug layer.");
131  }
132 
133  TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
134 
135  debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
136 
137  // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
138  debugLayer->SetBackendId(Compute::CpuRef);
139 
140  debugLayers.emplace_back(debugLayer);
141 
142  ++outputSlotIdx;
143  }
144 
145  return debugLayers;
146 }

References Layer::BeginOutputSlots(), CpuRef, Layer::EndOutputSlots(), InputSlot::GetConnectedOutputSlot(), Layer::GetInputSlot(), Layer::GetNameStr(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), Layer::SetBackendId(), and OutputSlot::SetTensorInfo().

Referenced by AddDebugImpl::Run(), and AddDebugToFileImpl::Run().

◆ InstanceNorm()

void InstanceNorm ( const InstanceNormalizationQueueDescriptor data,
const TensorInfo inputInfo,
Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 18 of file InstanceNorm.cpp.

22 {
23  const TensorShape inputShape = inputInfo.GetShape();
24 
25  armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
26 
27  unsigned int inputBatches = inputShape[0];
28  unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
29  unsigned int inputWidth = inputShape[dataLayout.GetWidthIndex()];
30  unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
31 
32  float beta = data.m_Parameters.m_Beta;
33  float eps = data.m_Parameters.m_Eps;
34  float gamma = data.m_Parameters.m_Gamma;
35 
36  for (unsigned int n = 0; n < inputBatches; ++n)
37  {
38  for (unsigned int c = 0; c < inputChannels; ++c)
39  {
40  float mean = 0, var = 0;
41 
42  //Calculate Mean
43  for (unsigned int h = 0; h < inputHeight; h++)
44  {
45  for (unsigned int w = 0; w < inputWidth; w++)
46  {
47  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
48 
49  inputDecoder[index];
50  float value = inputDecoder.Get();
51  mean += value;
52  }
53  }
54  mean /= static_cast<float>(inputHeight * inputWidth);
55 
56  //Calculate Variance
57  for (unsigned int h = 0; h < inputHeight; h++)
58  {
59  for (unsigned int w = 0; w < inputWidth; w++)
60  {
61  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
62 
63  inputDecoder[index];
64  float value = inputDecoder.Get();
65  var += (value - mean) * (value - mean);
66  }
67  }
68  var /= static_cast<float>(inputHeight * inputWidth);
69 
70  // Apply Instance Normalisation
71  for (unsigned int h = 0; h < inputHeight; ++h)
72  {
73  for (unsigned int w = 0; w < inputWidth; ++w)
74  {
75  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
76  inputDecoder[index];
77  outputEncoder[index];
78  outputEncoder.Set((inputDecoder.Get() - mean) * gamma / std::sqrt ( var + eps) + beta);
79  }
80 
81  }
82  }
83  }
84 }

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), InstanceNormalizationDescriptor::m_Beta, InstanceNormalizationDescriptor::m_DataLayout, InstanceNormalizationDescriptor::m_Eps, InstanceNormalizationDescriptor::m_Gamma, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

◆ IntersectionOverUnion()

float IntersectionOverUnion ( const float *  boxI,
const float *  boxJ 
)

Definition at line 29 of file DetectionPostProcess.cpp.

30 {
31  // Box-corner format: ymin, xmin, ymax, xmax.
32  const int yMin = 0;
33  const int xMin = 1;
34  const int yMax = 2;
35  const int xMax = 3;
36  float areaI = (boxI[yMax] - boxI[yMin]) * (boxI[xMax] - boxI[xMin]);
37  float areaJ = (boxJ[yMax] - boxJ[yMin]) * (boxJ[xMax] - boxJ[xMin]);
38  float yMinIntersection = std::max(boxI[yMin], boxJ[yMin]);
39  float xMinIntersection = std::max(boxI[xMin], boxJ[xMin]);
40  float yMaxIntersection = std::min(boxI[yMax], boxJ[yMax]);
41  float xMaxIntersection = std::min(boxI[xMax], boxJ[xMax]);
42  float areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) *
43  std::max(xMaxIntersection - xMinIntersection, 0.0f);
44  float areaUnion = areaI + areaJ - areaIntersection;
45  return areaIntersection / areaUnion;
46 }

Referenced by NonMaxSuppression().

◆ IsBFloat16()

bool armnn::IsBFloat16 ( const WorkloadInfo info)

Definition at line 56 of file RefWorkloadFactory.cpp.

57 {
58  return IsDataType<DataType::BFloat16>(info);
59 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsBoolean()

bool armnn::IsBoolean ( const WorkloadInfo info)

Definition at line 80 of file RefWorkloadFactory.cpp.

81 {
82  return IsDataType<DataType::Boolean>(info);
83 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsDataType()

bool IsDataType ( const WorkloadInfo info)

Definition at line 32 of file GpuFsaWorkloadFactory.cpp.

33 {
34  auto checkType = [](const TensorInfo& tensorInfo) {return tensorInfo.GetDataType() == ArmnnType;};
35  auto it = std::find_if(std::begin(info.m_InputTensorInfos), std::end(info.m_InputTensorInfos), checkType);
36  if (it != std::end(info.m_InputTensorInfos))
37  {
38  return true;
39  }
40  it = std::find_if(std::begin(info.m_OutputTensorInfos), std::end(info.m_OutputTensorInfos), checkType);
41  if (it != std::end(info.m_OutputTensorInfos))
42  {
43  return true;
44  }
45  return false;
46 }

References info.

◆ IsFloat16()

bool armnn::IsFloat16 ( const WorkloadInfo info)

Definition at line 60 of file RefWorkloadFactory.cpp.

61 {
62  return IsDataType<DataType::Float16>(info);
63 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsGpuFsaBackendSupported()

bool armnn::IsGpuFsaBackendSupported ( Optional< std::string & >  reasonIfUnsupported,
Args...  args 
)

Definition at line 31 of file GpuFsaLayerSupport.cpp.

32 {
33  IgnoreUnused(reasonIfUnsupported, (args)...);
34 #if defined(ARMCOMPUTEGPUFSA_ENABLED)
35  return true;
36 #else
37  if (reasonIfUnsupported)
38  {
39  reasonIfUnsupported.value() = "The armnn library has been built without CL support";
40  }
41  return false;
42 #endif
43 }

References IgnoreUnused(), and OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ IsLayerOptimizable() [1/2]

bool armnn::IsLayerOptimizable ( const armnn::Layer layer)

Definition at line 99 of file MockBackend.cpp.

100 {
101  return IsLayerOptimizable(&layer);
102 }

◆ IsLayerOptimizable() [2/2]

bool armnn::IsLayerOptimizable ( const armnn::Layer layer)

Definition at line 88 of file MockBackend.cpp.

89 {
90  ARMNN_ASSERT(layer != nullptr);
91 
92  // A Layer is not optimizable if its name contains "unoptimizable"
93  const std::string layerName(layer->GetName());
94  bool optimizable = layerName.find("unoptimizable") == std::string::npos;
95 
96  return optimizable;
97 }

References ARMNN_ASSERT, and Layer::GetName().

◆ IsLayerSequence()

bool armnn::IsLayerSequence ( Layer currentLayer,
TYPE  first,
TYPE  second,
TYPE  third,
Layer layerList[4],
bool  handleValidActivates,
const std::vector< ActivationFunction > &  validActivates 
)

Definition at line 375 of file SubgraphUtils.hpp.

382 {
383  auto PreviousLayer = [](Layer& layer)
384  {
385  return &layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
386  };
387 
388  auto NextLayer = [](Layer& layer)
389  {
390  return &layer.GetOutputSlot(0).GetConnection(0)->GetOwningLayer();
391  };
392 
393  auto LayerIncomingConnectionDataType = [](Layer& layer)
394  {
395  return layer.GetInputSlot(0).GetTensorInfo().GetDataType();
396  };
397 
398  bool result = false;
399 
400  // Match in reverse so there is only 1 connection to check
401  if (IsSequenceLayerType(currentLayer, third))
402  {
403  // Save DataType of third layer
404  DataType dataType = LayerIncomingConnectionDataType(currentLayer);
405 
406  // Save third layer
407  layerList[2] = &currentLayer;
408 
409  // Check the layers that proceed this one for the requested grouping
410  Layer *prevLayer = PreviousLayer(currentLayer);
411  if (prevLayer && IsSequenceLayerType(*prevLayer, second))
412  {
413  bool dataTypesMatch = (dataType == LayerIncomingConnectionDataType(*prevLayer));
414  if (! dataTypesMatch)
415  {
416  return result;
417  }
418 
419  layerList[1] = prevLayer;
420  prevLayer = PreviousLayer(*prevLayer);
421  if (prevLayer && IsSequenceLayerType(*prevLayer, first))
422  {
423  dataTypesMatch = (dataType == LayerIncomingConnectionDataType(*prevLayer));
424  if (! dataTypesMatch)
425  {
426  return result;
427  }
428 
429  layerList[0] = prevLayer;
430 
431  // Detected the first 3 layers if we get to this point so now
432  // check to see if we have a valid activation. If there is no activation
433  // then the sequence still matches.
434  if (handleValidActivates)
435  {
436  Layer *nextLayer = NextLayer(currentLayer);
437  if (nextLayer)
438  {
440  {
441  // This layer is an activation, so it must be a valid type for the sequence
442  ActivationFunction activationFunction =
443  PolymorphicDowncast<ActivationLayer*>(nextLayer)->GetParameters().m_Function;
444  long count = std::count(validActivates.cbegin(),
445  validActivates.cend(),
446  activationFunction);
447  if (count > 0)
448  {
449  layerList[3] = nextLayer;
450  result = true;
451  }
452  }
453  else
454  {
455  // Next layer is not an activation so sequence still matches
456  result = true;
457  }
458  }
459  }
460  else
461  {
462  result = true;
463  }
464  }
465  }
466  }
467 
468  return result;
469 }

◆ IsLayerSupported() [1/2]

bool armnn::IsLayerSupported ( const armnn::Layer layer)

Definition at line 83 of file MockBackend.cpp.

84 {
85  return IsLayerSupported(&layer);
86 }

◆ IsLayerSupported() [2/2]

bool armnn::IsLayerSupported ( const armnn::Layer layer)

Definition at line 62 of file MockBackend.cpp.

63 {
64  ARMNN_ASSERT(layer != nullptr);
65 
66  armnn::LayerType layerType = layer->GetType();
67  switch (layerType)
68  {
75  // Layer supported
76  return true;
77  default:
78  // Layer unsupported
79  return false;
80  }
81 }

References Addition, ARMNN_ASSERT, Constant, Convolution2d, ElementwiseBinary, Layer::GetType(), Input, and Output.

Referenced by SampleDynamicWorkloadFactory::IsLayerSupported().

◆ IsLayerTypeSupported()

bool armnn::IsLayerTypeSupported ( const LayerType type,
const std::vector< TensorInfo > &  infos,
const BaseDescriptor descriptor,
const Optional< LstmInputParamsInfo > &  lstmParamsInfo,
const Optional< QuantizedLstmInputParamsInfo > &  quantizedLstmParamsInfo,
Optional< std::string & >  reasonIfUnsupported,
const NeonLayerSupport support 
)

Definition at line 184 of file NeonLayerSupport.cpp.

191 {
192  switch (type)
193  {
195  return support.IsActivationSupported(infos[0],
196  infos[1],
197  *(PolymorphicDowncast<const ActivationDescriptor*>(&descriptor)),
198  reasonIfUnsupported);
199  case LayerType::Addition:
200  return support.IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
202  return support.IsArgMinMaxSupported(infos[0],
203  infos[1],
204  *(PolymorphicDowncast<const ArgMinMaxDescriptor*>(&descriptor)),
205  reasonIfUnsupported);
206  case LayerType::BatchMatMul:
207  return support.IsBatchMatMulSupported(infos[0],
208  infos[1],
209  infos[2],
210  *(PolymorphicDowncast<const BatchMatMulDescriptor*>(&descriptor)),
211  reasonIfUnsupported);
212  case LayerType::BatchNormalization:
213  return support.IsBatchNormalizationSupported(infos[0],
214  infos[1],
215  infos[2],
216  infos[3],
217  infos[4],
218  infos[5],
219  *(PolymorphicDowncast<const
220  BatchNormalizationDescriptor*>(&descriptor)),
221  reasonIfUnsupported);
223  return support.IsBatchToSpaceNdSupported(infos[0],
224  infos[1],
225  *(PolymorphicDowncast<const
226  BatchToSpaceNdDescriptor*>(&descriptor)),
227  reasonIfUnsupported);
228  case LayerType::Cast:
229  return support.IsCastSupported(infos[0], infos[1], reasonIfUnsupported);
230  case LayerType::ChannelShuffle:
231  return support.IsChannelShuffleSupported(infos[0],
232  infos[1],
233  *(PolymorphicDowncast<const
234  ChannelShuffleDescriptor*>(&descriptor)),
235  reasonIfUnsupported);
236  case LayerType::Comparison:
237  return support.IsComparisonSupported(infos[0],
238  infos[1],
239  infos[2],
240  *(PolymorphicDowncast<const ComparisonDescriptor*>(&descriptor)),
241  reasonIfUnsupported);
242  case LayerType::Concat:
243  {
244  std::vector<const TensorInfo*> inputInfos;
245  for (uint32_t i = 0; i < (infos.size() - 1); i++)
246  {
247  inputInfos.push_back(&infos[i]);
248  }
249  return support.IsConcatSupported(inputInfos,
250  infos[infos.size() - 1],
251  *(PolymorphicDowncast<const OriginsDescriptor*>(&descriptor)),
252  reasonIfUnsupported);
253  }
254  case LayerType::Constant:
255  return support.IsConstantSupported(infos[0], reasonIfUnsupported);
256  case LayerType::ConvertFp16ToFp32:
257  return support.IsConvertFp16ToFp32Supported(infos[0], infos[1], reasonIfUnsupported);
258  case LayerType::ConvertFp32ToFp16:
259  return support.IsConvertFp32ToFp16Supported(infos[0], infos[1], reasonIfUnsupported);
260  case LayerType::Convolution2d:
261  {
262  if (infos.size() != 4)
263  {
264  throw InvalidArgumentException("Invalid number of TransposeConvolution2d TensorInfos. "
265  "TensorInfos should be of format: {input, output, weights, biases}.");
266  }
267 
268  auto desc = *(PolymorphicDowncast<const Convolution2dDescriptor*>(&descriptor));
269  if (infos[3] == TensorInfo())
270  {
271  return support.IsConvolution2dSupported(infos[0],
272  infos[1],
273  desc,
274  infos[2],
275  EmptyOptional(),
276  reasonIfUnsupported);
277  }
278  else
279  {
280  return support.IsConvolution2dSupported(infos[0],
281  infos[1],
282  desc,
283  infos[2],
284  infos[3],
285  reasonIfUnsupported);
286  }
287  }
288  case LayerType::Convolution3d:
289  {
290  if (infos.size() != 4)
291  {
292  throw InvalidArgumentException("Invalid number of Convolution3d TensorInfos. "
293  "TensorInfos should be of format: {input, output, weights, biases}.");
294  }
295 
296  auto desc = *(PolymorphicDowncast<const Convolution3dDescriptor*>(&descriptor));
297  if (infos[3] == TensorInfo())
298  {
299  return support.IsConvolution3dSupported(infos[0],
300  infos[1],
301  desc,
302  infos[2],
303  EmptyOptional(),
304  reasonIfUnsupported);
305  }
306  else
307  {
308  return support.IsConvolution3dSupported(infos[0],
309  infos[1],
310  desc,
311  infos[2],
312  infos[3],
313  reasonIfUnsupported);
314  }
315  }
317  return support.IsDepthToSpaceSupported(infos[0],
318  infos[1],
319  *(PolymorphicDowncast<const DepthToSpaceDescriptor*>(&descriptor)),
320  reasonIfUnsupported);
321  case LayerType::DepthwiseConvolution2d:
322  {
323  if (infos.size() != 4)
324  {
325  throw InvalidArgumentException("Invalid number of DepthwiseConvolution2d TensorInfos. "
326  "TensorInfos should be of format: {input, output, weights, biases}.");
327  }
328 
329  auto desc = *(PolymorphicDowncast<const DepthwiseConvolution2dDescriptor*>(&descriptor));
330  if (infos[3] == TensorInfo())
331  {
332  return support.IsDepthwiseConvolutionSupported(infos[0],
333  infos[1],
334  desc,
335  infos[2],
336  EmptyOptional(),
337  reasonIfUnsupported);
338  }
339  else
340  {
341  return support.IsDepthwiseConvolutionSupported(infos[0],
342  infos[1],
343  desc,
344  infos[2],
345  infos[3],
346  reasonIfUnsupported);
347  }
348  }
350  return support.IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported);
352  {
353  auto desc = *(PolymorphicDowncast<const DetectionPostProcessDescriptor*>(&descriptor));
354  return support.IsDetectionPostProcessSupported(infos[0],
355  infos[1],
356  infos[2],
357  infos[3],
358  infos[4],
359  infos[5],
360  infos[6],
361  desc,
362  reasonIfUnsupported);
363  }
364  case LayerType::Division:
365  return support.IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
366  case LayerType::ElementwiseBinary:
367  {
368  auto desc = *(PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&descriptor));
369 
370  switch (desc.m_Operation)
371  {
372  case BinaryOperation::Add:
374  reasonIfUnsupported,
375  infos[0],
376  infos[1],
377  infos[2],
378  nullptr);
379  case BinaryOperation::Div:
381  reasonIfUnsupported,
382  infos[0],
383  infos[1],
384  infos[2],
385  nullptr);
386  case BinaryOperation::Maximum:
388  reasonIfUnsupported,
389  infos[0],
390  infos[1],
391  infos[2]);
392  case BinaryOperation::Minimum:
394  reasonIfUnsupported,
395  infos[0],
396  infos[1],
397  infos[2]);
398  case BinaryOperation::Mul:
400  reasonIfUnsupported,
401  infos[0],
402  infos[1],
403  infos[2],
404  nullptr);
405  case BinaryOperation::Power:
406  case BinaryOperation::SqDiff:
408  reasonIfUnsupported,
409  infos[0],
410  infos[1],
411  infos[2],
412  desc,
413  nullptr);
414  case BinaryOperation::Sub:
416  reasonIfUnsupported,
417  infos[0],
418  infos[1],
419  infos[2],
420  nullptr);
421  default:
422  return false;
423  }
424  }
425  case LayerType::ElementwiseUnary:
426  return support.IsElementwiseUnarySupported(infos[0],
427  infos[1],
428  *(PolymorphicDowncast<const
429  ElementwiseUnaryDescriptor*>(&descriptor)),
430  reasonIfUnsupported);
431  case LayerType::Fill:
432  return support.IsFillSupported(infos[0],
433  infos[1],
434  *(PolymorphicDowncast<const FillDescriptor*>(&descriptor)),
435  reasonIfUnsupported);
436  case LayerType::Floor:
437  return support.IsFloorSupported(infos[0], infos[1], reasonIfUnsupported);
439  return support.IsFullyConnectedSupported(infos[0],
440  infos[1],
441  infos[2],
442  infos[3],
443  *(PolymorphicDowncast<const
444  FullyConnectedDescriptor*>(&descriptor)),
445  reasonIfUnsupported);
446  case LayerType::Fused:
447  {
448  auto fusedDescriptor = *(PolymorphicDowncast<const FusedDescriptor*>(&descriptor));
449  if (fusedDescriptor.m_NumInputSlots + fusedDescriptor.m_NumOutputSlots != infos.size())
450  {
451  throw InvalidArgumentException("Invalid number of FusedLayer TensorInfos.");
452  }
453 
454  auto it = infos.begin() + numeric_cast<TensorInfo::DifferenceType>(fusedDescriptor.m_NumInputSlots);
455  std::vector<TensorInfo> inputInfos(infos.begin(), it);
456  std::vector<TensorInfo> outputInfos(it, infos.end());
457 
458  return support.IsFusedSupported({inputInfos.begin(), inputInfos.end()},
459  {outputInfos.begin(), outputInfos.end()},
460  fusedDescriptor,
461  reasonIfUnsupported);
462  }
463  case LayerType::Gather:
464  return support.IsGatherSupported(infos[0],
465  infos[1],
466  infos[2],
467  *(PolymorphicDowncast<const GatherDescriptor*>(&descriptor)),
468  reasonIfUnsupported);
469  case LayerType::GatherNd:
470  return support.IsGatherNdSupported(infos[0],
471  infos[1],
472  infos[2],
473  reasonIfUnsupported);
474  case LayerType::Input:
475  return support.IsInputSupported(infos[0], reasonIfUnsupported);
476  case LayerType::InstanceNormalization:
477  return support.IsInstanceNormalizationSupported(infos[0],
478  infos[1],
479  *(PolymorphicDowncast<const
480  InstanceNormalizationDescriptor*>(&descriptor)),
481  reasonIfUnsupported);
482  case LayerType::L2Normalization:
483  return support.IsL2NormalizationSupported(infos[0],
484  infos[1],
485  *(PolymorphicDowncast<const
486  L2NormalizationDescriptor*>(&descriptor)),
487  reasonIfUnsupported);
488  case LayerType::LogicalBinary:
489  return support.IsLogicalBinarySupported(infos[0],
490  infos[1],
491  infos[2],
492  *(PolymorphicDowncast<const
493  LogicalBinaryDescriptor*>(&descriptor)),
494  reasonIfUnsupported);
496  return support.IsLogSoftmaxSupported(infos[0],
497  infos[1],
498  *(PolymorphicDowncast<const LogSoftmaxDescriptor*>(&descriptor)),
499  reasonIfUnsupported);
500  case LayerType::Lstm:
501  return support.IsLstmSupported(infos[0],
502  infos[1],
503  infos[2],
504  infos[3],
505  infos[4],
506  infos[5],
507  infos[6],
508  *(PolymorphicDowncast<const LstmDescriptor*>(&descriptor)),
509  lstmParamsInfo.value(),
510  reasonIfUnsupported);
511  case LayerType::Map:
512  return true;
513  case LayerType::Maximum:
514  return support.IsMaximumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
515  case LayerType::Mean:
516  return support.IsMeanSupported(infos[0],
517  infos[1],
518  *(PolymorphicDowncast<const MeanDescriptor*>(&descriptor)),
519  reasonIfUnsupported);
520  case LayerType::MemCopy:
521  return support.IsMemCopySupported(infos[0], infos[1], reasonIfUnsupported);
522  case LayerType::MemImport:
523  return support.IsMemImportSupported(infos[0], infos[1], reasonIfUnsupported);
524  case LayerType::Merge:
525  return support.IsMergeSupported(infos[0],
526  infos[1],
527  infos[2],
528  reasonIfUnsupported);
529  case LayerType::Minimum:
530  return support.IsMinimumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
531  case LayerType::Multiplication:
532  return support.IsMultiplicationSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
533  case LayerType::Normalization:
534  return support.IsNormalizationSupported(infos[0],
535  infos[1],
536  *(PolymorphicDowncast<const
537  NormalizationDescriptor*>(&descriptor)),
538  reasonIfUnsupported);
539  case LayerType::Output:
540  return support.IsOutputSupported(infos[0], reasonIfUnsupported);
541  case LayerType::Pad:
542  return support.IsPadSupported(infos[0],
543  infos[1],
544  *(PolymorphicDowncast<const PadDescriptor*>(&descriptor)),
545  reasonIfUnsupported);
546  case LayerType::Permute:
547  return support.IsPermuteSupported(infos[0],
548  infos[1],
549  *(PolymorphicDowncast<const PermuteDescriptor*>(&descriptor)),
550  reasonIfUnsupported);
552  return support.IsPooling2dSupported(infos[0],
553  infos[1],
554  *(PolymorphicDowncast<const Pooling2dDescriptor*>(&descriptor)),
555  reasonIfUnsupported);
557  return support.IsPooling3dSupported(infos[0],
558  infos[1],
559  *(PolymorphicDowncast<const Pooling3dDescriptor*>(&descriptor)),
560  reasonIfUnsupported);
561  case LayerType::Prelu:
562  return support.IsPreluSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
563  case LayerType::QLstm:
564  return support.IsQLstmSupported(infos[0],
565  infos[1],
566  infos[2],
567  infos[3],
568  infos[4],
569  infos[5],
570  *(PolymorphicDowncast<const QLstmDescriptor*>(&descriptor)),
571  lstmParamsInfo.value(),
572  reasonIfUnsupported);
573  case LayerType::Quantize:
574  return support.IsQuantizeSupported(infos[0], infos[1], reasonIfUnsupported);
575  case LayerType::QuantizedLstm:
576  return support.IsQuantizedLstmSupported(infos[0],
577  infos[1],
578  infos[2],
579  infos[3],
580  infos[4],
581  quantizedLstmParamsInfo.value(),
582  reasonIfUnsupported);
583  case LayerType::Rank:
584  return true;
585  case LayerType::Reshape:
586  return support.IsReshapeSupported(infos[0],
587  infos[1],
588  *(PolymorphicDowncast<const ReshapeDescriptor*>(&descriptor)),
589  reasonIfUnsupported);
590  case LayerType::Resize:
591  return support.IsResizeSupported(infos[0],
592  infos[1],
593  *(PolymorphicDowncast<const ResizeDescriptor*>(&descriptor)),
594  reasonIfUnsupported);
595  case LayerType::Reduce:
596  return support.IsReduceSupported(infos[0],
597  infos[1],
598  *(PolymorphicDowncast<const ReduceDescriptor*>(&descriptor)),
599  reasonIfUnsupported);
601  return support.IsReverseV2Supported(infos[0],
602  infos[1],
603  infos[2],
604  reasonIfUnsupported);
605  case LayerType::Shape:
606  return support.IsShapeSupported(infos[0],
607  infos[1],
608  reasonIfUnsupported);
609  case LayerType::Slice:
610  return support.IsSliceSupported(infos[0],
611  infos[1],
612  *(PolymorphicDowncast<const SliceDescriptor*>(&descriptor)),
613  reasonIfUnsupported);
614  case LayerType::Softmax:
615  return support.IsSoftmaxSupported(infos[0],
616  infos[1],
617  *(PolymorphicDowncast<const SoftmaxDescriptor*>(&descriptor)),
618  reasonIfUnsupported);
620  return support.IsSpaceToBatchNdSupported(infos[0],
621  infos[1],
622  *(PolymorphicDowncast<const
623  SpaceToBatchNdDescriptor*>(&descriptor)),
624  reasonIfUnsupported);
626  return support.IsSpaceToDepthSupported(infos[0],
627  infos[1],
628  *(PolymorphicDowncast<const SpaceToDepthDescriptor*>(&descriptor)),
629  reasonIfUnsupported);
630  case LayerType::Splitter:
631  {
632  std::vector<TensorInfo> outputInfos;
633  for (uint32_t i = 1; i < infos.size(); i++)
634  {
635  outputInfos.push_back(infos[i]);
636  }
637  return support.IsSplitterSupported(infos[0],
638  {outputInfos.begin(), outputInfos.end()},
639  *(PolymorphicDowncast<const ViewsDescriptor*>(&descriptor)),
640  reasonIfUnsupported);
641  }
642  case LayerType::Stack:
643  {
644  std::vector<const TensorInfo*> inputInfos;
645  for (uint32_t i = 0; i < infos.size() - 1; i++)
646  {
647  inputInfos.push_back(&infos[i]);
648  }
649  return support.IsStackSupported(inputInfos,
650  infos[infos.size() - 1],
651  *(PolymorphicDowncast<const StackDescriptor*>(&descriptor)),
652  reasonIfUnsupported);
653  }
655  return support.IsStridedSliceSupported(infos[0],
656  infos[1],
657  *(PolymorphicDowncast<const StridedSliceDescriptor*>(&descriptor)),
658  reasonIfUnsupported);
659  case LayerType::Subtraction:
660  return support.IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
661  case LayerType::Tile:
662  return support.IsTileSupported(infos[0],
663  infos[1],
664  *(PolymorphicDowncast<const TileDescriptor*>(&descriptor)),
665  reasonIfUnsupported);
667  return support.IsTransposeSupported(infos[0],
668  infos[1],
669  *(PolymorphicDowncast<const TransposeDescriptor*>(&descriptor)),
670  reasonIfUnsupported);
671  case LayerType::TransposeConvolution2d:
672  {
673  if (infos.size() != 4)
674  {
675  throw InvalidArgumentException("Invalid number of TransposeConvolution2d TensorInfos. "
676  "TensorInfos should be of format: {input, output, weights, biases}.");
677  }
678 
679  auto desc = *(PolymorphicDowncast<const TransposeConvolution2dDescriptor*>(&descriptor));
680  if (infos[3] == TensorInfo())
681  {
682  return support.IsTransposeConvolution2dSupported(infos[0],
683  infos[1],
684  desc,
685  infos[2],
686  EmptyOptional(),
687  reasonIfUnsupported);
688  }
689  else
690  {
691  return support.IsTransposeConvolution2dSupported(infos[0],
692  infos[1],
693  desc,
694  infos[2],
695  infos[3],
696  reasonIfUnsupported);
697  }
698  }
699  case LayerType::UnidirectionalSequenceLstm:
700  {
701  auto desc = *(PolymorphicDowncast<const UnidirectionalSequenceLstmDescriptor*>(&descriptor));
702  return support.IsUnidirectionalSequenceLstmSupported(infos[0],
703  infos[1],
704  infos[2],
705  infos[3],
706  infos[4],
707  infos[5],
708  desc,
709  lstmParamsInfo.value(),
710  reasonIfUnsupported);
711  }
712  case LayerType::Unmap:
713  return true;
714  default:
715  // layers not supported in neon by default:
716  // debug, fakequantization, precompiled,
717  // standin, switch
718  return false;
719  }
720 }

References Activation, Add, Addition, ArgMinMax, BatchMatMul, BatchNormalization, BatchToSpaceNd, Cast, ChannelShuffle, Comparison, Concat, Constant, ConvertFp16ToFp32, ConvertFp32ToFp16, Convolution2d, Convolution3d, DepthToSpace, DepthwiseConvolution2d, Dequantize, DetectionPostProcess, Div, Division, ElementwiseBinary, ElementwiseUnary, Fill, Floor, FORWARD_WORKLOAD_VALIDATE_FUNC, FullyConnected, Fused, Gather, GatherNd, Input, InstanceNormalization, NeonLayerSupport::IsActivationSupported(), NeonLayerSupport::IsAdditionSupported(), NeonLayerSupport::IsArgMinMaxSupported(), NeonLayerSupport::IsBatchMatMulSupported(), NeonLayerSupport::IsBatchNormalizationSupported(), NeonLayerSupport::IsBatchToSpaceNdSupported(), NeonLayerSupport::IsCastSupported(), NeonLayerSupport::IsChannelShuffleSupported(), NeonLayerSupport::IsComparisonSupported(), NeonLayerSupport::IsConcatSupported(), NeonLayerSupport::IsConstantSupported(), NeonLayerSupport::IsConvertFp16ToFp32Supported(), NeonLayerSupport::IsConvertFp32ToFp16Supported(), NeonLayerSupport::IsConvolution2dSupported(), NeonLayerSupport::IsConvolution3dSupported(), NeonLayerSupport::IsDepthToSpaceSupported(), NeonLayerSupport::IsDepthwiseConvolutionSupported(), NeonLayerSupport::IsDequantizeSupported(), LayerSupportBase::IsDetectionPostProcessSupported(), NeonLayerSupport::IsDivisionSupported(), NeonLayerSupport::IsElementwiseUnarySupported(), NeonLayerSupport::IsFillSupported(), NeonLayerSupport::IsFloorSupported(), NeonLayerSupport::IsFullyConnectedSupported(), NeonLayerSupport::IsFusedSupported(), NeonLayerSupport::IsGatherNdSupported(), NeonLayerSupport::IsGatherSupported(), NeonLayerSupport::IsInputSupported(), NeonLayerSupport::IsInstanceNormalizationSupported(), NeonLayerSupport::IsL2NormalizationSupported(), NeonLayerSupport::IsLogicalBinarySupported(), NeonLayerSupport::IsLogSoftmaxSupported(), NeonLayerSupport::IsLstmSupported(), NeonLayerSupport::IsMaximumSupported(), NeonLayerSupport::IsMeanSupported(), LayerSupportBase::IsMemCopySupported(), LayerSupportBase::IsMemImportSupported(), LayerSupportBase::IsMergeSupported(), NeonLayerSupport::IsMinimumSupported(), NeonLayerSupport::IsMultiplicationSupported(), NeonLayerSupport::IsNormalizationSupported(), NeonLayerSupport::IsOutputSupported(), NeonLayerSupport::IsPadSupported(), NeonLayerSupport::IsPermuteSupported(), NeonLayerSupport::IsPooling2dSupported(), NeonLayerSupport::IsPooling3dSupported(), NeonLayerSupport::IsPreluSupported(), NeonLayerSupport::IsQLstmSupported(), NeonLayerSupport::IsQuantizedLstmSupported(), NeonLayerSupport::IsQuantizeSupported(), NeonLayerSupport::IsReduceSupported(), NeonLayerSupport::IsReshapeSupported(), NeonLayerSupport::IsResizeSupported(), NeonLayerSupport::IsReverseV2Supported(), LayerSupportBase::IsShapeSupported(), NeonLayerSupport::IsSliceSupported(), NeonLayerSupport::IsSoftmaxSupported(), NeonLayerSupport::IsSpaceToBatchNdSupported(), NeonLayerSupport::IsSpaceToDepthSupported(), NeonLayerSupport::IsSplitterSupported(), NeonLayerSupport::IsStackSupported(), NeonLayerSupport::IsStridedSliceSupported(), NeonLayerSupport::IsSubtractionSupported(), NeonLayerSupport::IsTileSupported(), NeonLayerSupport::IsTransposeConvolution2dSupported(), NeonLayerSupport::IsTransposeSupported(), NeonLayerSupport::IsUnidirectionalSequenceLstmSupported(), L2Normalization, LogicalBinary, LogSoftmax, Lstm, Map, Maximum, Mean, MemCopy, MemImport, Merge, Minimum, Mul, Multiplication, NeonAdditionWorkloadValidate(), NeonDivisionWorkloadValidate(), NeonElementwiseBinaryWorkloadValidate(), NeonMaximumWorkloadValidate(), NeonMinimumWorkloadValidate(), NeonMultiplicationWorkloadValidate(), NeonSubtractionWorkloadValidate(), Normalization, Output, Pad, Permute, PolymorphicDowncast(), Pooling2d, Pooling3d, Power, Prelu, QLstm, Quantize, QuantizedLstm, Rank, Reduce, Reshape, Resize, ReverseV2, Shape, Slice, Softmax, SpaceToBatchNd, SpaceToDepth, Splitter, SqDiff, Stack, StridedSlice, Sub, Subtraction, Tile, Transpose, TransposeConvolution2d, UnidirectionalSequenceLstm, Unmap, and OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by NeonLayerSupport::IsLayerSupported().

◆ IsNCHW()

bool armnn::IsNCHW ( armnn::Layer layer)
inline

Definition at line 213 of file SubgraphUtils.hpp.

214 {
215  CheckForNCHW check;
216  layer.ExecuteStrategy(check);
217  return check.Result();
218 }

References Layer::ExecuteStrategy().

Referenced by ConnectedToLayerWithNCHW().

◆ IsOperationQueueDescriptor() [1/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const ConstantQueueDescriptor )
constexpr

Definition at line 22 of file RefWorkloadFactory.hpp.

22 { return false; }

◆ IsOperationQueueDescriptor() [2/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const MemCopyQueueDescriptor )
constexpr

Definition at line 20 of file RefWorkloadFactory.hpp.

20 { return false; }

◆ IsOperationQueueDescriptor() [3/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const PermuteQueueDescriptor )
constexpr

Definition at line 24 of file RefWorkloadFactory.hpp.

24 { return false; }

◆ IsOperationQueueDescriptor() [4/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const QueueDescriptorType &  )
constexpr

Definition at line 18 of file RefWorkloadFactory.hpp.

18 { return true; }

◆ IsQAsymmS8()

bool armnn::IsQAsymmS8 ( const WorkloadInfo info)

Definition at line 72 of file RefWorkloadFactory.cpp.

73 {
74  return IsDataType<DataType::QAsymmS8>(info);
75 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQAsymmU8()

bool armnn::IsQAsymmU8 ( const WorkloadInfo info)

Definition at line 76 of file RefWorkloadFactory.cpp.

77 {
78  return IsDataType<DataType::QAsymmU8>(info);
79 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQSymmS16()

bool armnn::IsQSymmS16 ( const WorkloadInfo info)

Definition at line 64 of file RefWorkloadFactory.cpp.

65 {
66  return IsDataType<DataType::QSymmS16>(info);
67 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQSymmS8()

bool armnn::IsQSymmS8 ( const WorkloadInfo info)

Definition at line 68 of file RefWorkloadFactory.cpp.

69 {
70  return IsDataType<DataType::QSymmS8>(info);
71 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQuantized8BitType()

constexpr bool armnn::IsQuantized8BitType ( DataType  dataType)
constexpr

Definition at line 316 of file TypesUtils.hpp.

317 {
318  return dataType == DataType::QAsymmU8 ||
319  dataType == DataType::QAsymmS8 ||
320  dataType == DataType::QSymmS8;
321 }

References QAsymmS8, QAsymmU8, and QSymmS8.

Referenced by ConvertSoftmaxToTosaOperator(), RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), IsQuantizedType(), and RefLayerSupport::IsTransposeConvolution2dSupported().

◆ IsQuantizedType() [1/2]

constexpr bool armnn::IsQuantizedType ( )
constexpr

◆ IsQuantizedType() [2/2]

constexpr bool armnn::IsQuantizedType ( DataType  dataType)
constexpr

Definition at line 323 of file TypesUtils.hpp.

324 {
325  return dataType == DataType::QSymmS16 || IsQuantized8BitType(dataType);
326 }

References IsQuantized8BitType(), and QSymmS16.

◆ IsReadyForSplitAssignment()

bool armnn::IsReadyForSplitAssignment ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo 
)

Definition at line 387 of file SubgraphViewSelector.cpp.

388 {
389  bool ready = true;
390  ForEachLayerInput(layerInfos, layerInfo,
391  [&ready](LayerSelectionInfo& parentInfo)
392  {
393  if (!parentInfo.m_IsProcessed)
394  {
395  ready = false;
396  }
397  });
398  return ready;
399 }

References ForEachLayerInput().

Referenced by SubgraphViewSelector::SelectSubgraphs().

◆ IsSequenceLayerType() [1/2]

bool armnn::IsSequenceLayerType ( Layer layer,
BinaryOperation  type 
)
inline

Definition at line 367 of file SubgraphUtils.hpp.

368 {
369  return (layer.GetType() == LayerType::ElementwiseBinary) &&
370  (PolymorphicDowncast<ElementwiseBinaryLayer*>(&layer)->GetParameters().m_Operation == type);
371 }

References ElementwiseBinary, and Layer::GetType().

◆ IsSequenceLayerType() [2/2]

bool armnn::IsSequenceLayerType ( Layer layer,
LayerType  type 
)
inline

Definition at line 362 of file SubgraphUtils.hpp.

363 {
364  return layer.GetType() == type;
365 }

References Layer::GetType().

Referenced by BuildAddMulAddTensorInfoLists().

◆ IsSigned32()

bool armnn::IsSigned32 ( const WorkloadInfo info)

Definition at line 52 of file RefWorkloadFactory.cpp.

53 {
54  return IsDataType<DataType::Signed32>(info);
55 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsSigned64()

bool armnn::IsSigned64 ( const WorkloadInfo info)

Definition at line 48 of file RefWorkloadFactory.cpp.

49 {
50  return IsDataType<DataType::Signed64>(info);
51 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsSupportedForDataTypeGeneric()

bool armnn::IsSupportedForDataTypeGeneric ( Optional< std::string & >  reasonIfUnsupported,
DataType  dataType,
Float16Func  float16FuncPtr,
Float32Func  float32FuncPtr,
Uint8Func  uint8FuncPtr,
Int32Func  int32FuncPtr,
BooleanFunc  booleanFuncPtr,
Params &&...  params 
)

Definition at line 27 of file LayerSupportCommon.hpp.

35 {
36  switch(dataType)
37  {
38  case DataType::Float16:
39  return float16FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
40  case DataType::Float32:
41  return float32FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
42  case DataType::QAsymmU8:
43  return uint8FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
44  case DataType::Signed32:
45  return int32FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
46  case DataType::Boolean:
47  return booleanFuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
48  default:
49  return false;
50  }
51 }

References Boolean, Float16, Float32, QAsymmU8, and Signed32.

Referenced by RefLayerSupport::IsConvertFp16ToFp32Supported(), RefLayerSupport::IsConvertFp32ToFp16Supported(), and NeonLayerSupport::IsFloorSupported().

◆ LayerEnumOf() [1/79]

constexpr LayerType armnn::LayerEnumOf ( const ActivationLayer )
constexpr

Definition at line 115 of file LayersFwd.hpp.

◆ LayerEnumOf() [2/79]

constexpr LayerType armnn::LayerEnumOf ( const AdditionLayer )
constexpr

Definition at line 116 of file LayersFwd.hpp.

◆ LayerEnumOf() [3/79]

constexpr LayerType armnn::LayerEnumOf ( const ArgMinMaxLayer )
constexpr

Definition at line 117 of file LayersFwd.hpp.

◆ LayerEnumOf() [4/79]

constexpr LayerType armnn::LayerEnumOf ( const BatchMatMulLayer )
constexpr

Definition at line 118 of file LayersFwd.hpp.

◆ LayerEnumOf() [5/79]

constexpr LayerType armnn::LayerEnumOf ( const BatchNormalizationLayer )
constexpr

Definition at line 119 of file LayersFwd.hpp.

◆ LayerEnumOf() [6/79]

constexpr LayerType armnn::LayerEnumOf ( const BatchToSpaceNdLayer )
constexpr

Definition at line 120 of file LayersFwd.hpp.

◆ LayerEnumOf() [7/79]

constexpr LayerType armnn::LayerEnumOf ( const BroadcastToLayer )
constexpr

Definition at line 121 of file LayersFwd.hpp.

◆ LayerEnumOf() [8/79]

constexpr LayerType armnn::LayerEnumOf ( const CastLayer )
constexpr

Definition at line 122 of file LayersFwd.hpp.

◆ LayerEnumOf() [9/79]

constexpr LayerType armnn::LayerEnumOf ( const ChannelShuffleLayer )
constexpr

Definition at line 123 of file LayersFwd.hpp.

◆ LayerEnumOf() [10/79]

constexpr LayerType armnn::LayerEnumOf ( const ComparisonLayer )
constexpr

Definition at line 124 of file LayersFwd.hpp.

◆ LayerEnumOf() [11/79]

constexpr LayerType armnn::LayerEnumOf ( const ConcatLayer )
constexpr

Definition at line 125 of file LayersFwd.hpp.

◆ LayerEnumOf() [12/79]

constexpr LayerType armnn::LayerEnumOf ( const ConstantLayer )
constexpr

Definition at line 126 of file LayersFwd.hpp.

◆ LayerEnumOf() [13/79]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp16ToFp32Layer )
constexpr

Definition at line 127 of file LayersFwd.hpp.

◆ LayerEnumOf() [14/79]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp32ToFp16Layer )
constexpr

Definition at line 128 of file LayersFwd.hpp.

◆ LayerEnumOf() [15/79]

constexpr LayerType armnn::LayerEnumOf ( const Convolution2dLayer )
constexpr

Definition at line 129 of file LayersFwd.hpp.

◆ LayerEnumOf() [16/79]

constexpr LayerType armnn::LayerEnumOf ( const Convolution3dLayer )
constexpr

Definition at line 130 of file LayersFwd.hpp.

◆ LayerEnumOf() [17/79]

constexpr LayerType armnn::LayerEnumOf ( const DebugLayer )
constexpr

Definition at line 131 of file LayersFwd.hpp.

◆ LayerEnumOf() [18/79]

constexpr LayerType armnn::LayerEnumOf ( const DepthToSpaceLayer )
constexpr

Definition at line 132 of file LayersFwd.hpp.

◆ LayerEnumOf() [19/79]

constexpr LayerType armnn::LayerEnumOf ( const DepthwiseConvolution2dLayer )
constexpr

Definition at line 133 of file LayersFwd.hpp.

◆ LayerEnumOf() [20/79]

constexpr LayerType armnn::LayerEnumOf ( const DequantizeLayer )
constexpr

Definition at line 134 of file LayersFwd.hpp.

◆ LayerEnumOf() [21/79]

constexpr LayerType armnn::LayerEnumOf ( const DetectionPostProcessLayer )
constexpr

Definition at line 135 of file LayersFwd.hpp.

◆ LayerEnumOf() [22/79]

constexpr LayerType armnn::LayerEnumOf ( const DivisionLayer )
constexpr

Definition at line 136 of file LayersFwd.hpp.

◆ LayerEnumOf() [23/79]

constexpr LayerType armnn::LayerEnumOf ( const ElementwiseBinaryLayer )
constexpr

Definition at line 137 of file LayersFwd.hpp.

◆ LayerEnumOf() [24/79]

constexpr LayerType armnn::LayerEnumOf ( const ElementwiseUnaryLayer )
constexpr

Definition at line 138 of file LayersFwd.hpp.

◆ LayerEnumOf() [25/79]

constexpr LayerType armnn::LayerEnumOf ( const FakeQuantizationLayer )
constexpr

Definition at line 139 of file LayersFwd.hpp.

◆ LayerEnumOf() [26/79]

constexpr LayerType armnn::LayerEnumOf ( const FillLayer )
constexpr

Definition at line 140 of file LayersFwd.hpp.

◆ LayerEnumOf() [27/79]

constexpr LayerType armnn::LayerEnumOf ( const FloorLayer )
constexpr

Definition at line 141 of file LayersFwd.hpp.

◆ LayerEnumOf() [28/79]

constexpr LayerType armnn::LayerEnumOf ( const FullyConnectedLayer )
constexpr

Definition at line 142 of file LayersFwd.hpp.

◆ LayerEnumOf() [29/79]

constexpr LayerType armnn::LayerEnumOf ( const FusedLayer )
constexpr

Definition at line 143 of file LayersFwd.hpp.

◆ LayerEnumOf() [30/79]

constexpr LayerType armnn::LayerEnumOf ( const GatherLayer )
constexpr

Definition at line 144 of file LayersFwd.hpp.

◆ LayerEnumOf() [31/79]

constexpr LayerType armnn::LayerEnumOf ( const GatherNdLayer )
constexpr

Definition at line 145 of file LayersFwd.hpp.

◆ LayerEnumOf() [32/79]

constexpr LayerType armnn::LayerEnumOf ( const InputLayer )
constexpr

Definition at line 146 of file LayersFwd.hpp.

◆ LayerEnumOf() [33/79]

constexpr LayerType armnn::LayerEnumOf ( const InstanceNormalizationLayer )
constexpr

Definition at line 147 of file LayersFwd.hpp.

◆ LayerEnumOf() [34/79]

constexpr LayerType armnn::LayerEnumOf ( const L2NormalizationLayer )
constexpr

Definition at line 148 of file LayersFwd.hpp.

◆ LayerEnumOf() [35/79]

constexpr LayerType armnn::LayerEnumOf ( const LogicalBinaryLayer )
constexpr

Definition at line 149 of file LayersFwd.hpp.

◆ LayerEnumOf() [36/79]

constexpr LayerType armnn::LayerEnumOf ( const LogSoftmaxLayer )
constexpr

Definition at line 150 of file LayersFwd.hpp.

◆ LayerEnumOf() [37/79]

constexpr LayerType armnn::LayerEnumOf ( const LstmLayer )
constexpr

Definition at line 151 of file LayersFwd.hpp.

◆ LayerEnumOf() [38/79]

constexpr LayerType armnn::LayerEnumOf ( const MapLayer )
constexpr

Definition at line 152 of file LayersFwd.hpp.

◆ LayerEnumOf() [39/79]

constexpr LayerType armnn::LayerEnumOf ( const MaximumLayer )
constexpr

Definition at line 153 of file LayersFwd.hpp.

◆ LayerEnumOf() [40/79]

constexpr LayerType armnn::LayerEnumOf ( const MeanLayer )
constexpr

Definition at line 154 of file LayersFwd.hpp.

◆ LayerEnumOf() [41/79]

constexpr LayerType armnn::LayerEnumOf ( const MemCopyLayer )
constexpr

Definition at line 155 of file LayersFwd.hpp.

◆ LayerEnumOf() [42/79]

constexpr LayerType armnn::LayerEnumOf ( const MemImportLayer )
constexpr

Definition at line 156 of file LayersFwd.hpp.

◆ LayerEnumOf() [43/79]

constexpr LayerType armnn::LayerEnumOf ( const MergeLayer )
constexpr

Definition at line 157 of file LayersFwd.hpp.

◆ LayerEnumOf() [44/79]

constexpr LayerType armnn::LayerEnumOf ( const MinimumLayer )
constexpr

Definition at line 158 of file LayersFwd.hpp.

◆ LayerEnumOf() [45/79]

constexpr LayerType armnn::LayerEnumOf ( const MultiplicationLayer )
constexpr

Definition at line 159 of file LayersFwd.hpp.

◆ LayerEnumOf() [46/79]

constexpr LayerType armnn::LayerEnumOf ( const NormalizationLayer )
constexpr

Definition at line 160 of file LayersFwd.hpp.

◆ LayerEnumOf() [47/79]

constexpr LayerType armnn::LayerEnumOf ( const OutputLayer )
constexpr

Definition at line 161 of file LayersFwd.hpp.

◆ LayerEnumOf() [48/79]

constexpr LayerType armnn::LayerEnumOf ( const PadLayer )
constexpr

Definition at line 162 of file LayersFwd.hpp.

◆ LayerEnumOf() [49/79]

constexpr LayerType armnn::LayerEnumOf ( const PermuteLayer )
constexpr

Definition at line 163 of file LayersFwd.hpp.

◆ LayerEnumOf() [50/79]

constexpr LayerType armnn::LayerEnumOf ( const Pooling2dLayer )
constexpr

Definition at line 164 of file LayersFwd.hpp.

◆ LayerEnumOf() [51/79]

constexpr LayerType armnn::LayerEnumOf ( const Pooling3dLayer )
constexpr

Definition at line 165 of file LayersFwd.hpp.

◆ LayerEnumOf() [52/79]

constexpr LayerType armnn::LayerEnumOf ( const PreCompiledLayer )
constexpr

Definition at line 166 of file LayersFwd.hpp.

◆ LayerEnumOf() [53/79]

constexpr LayerType armnn::LayerEnumOf ( const PreluLayer )
constexpr

Definition at line 167 of file LayersFwd.hpp.

◆ LayerEnumOf() [54/79]

constexpr LayerType armnn::LayerEnumOf ( const QLstmLayer )
constexpr

Definition at line 169 of file LayersFwd.hpp.

◆ LayerEnumOf() [55/79]

constexpr LayerType armnn::LayerEnumOf ( const QuantizedLstmLayer )
constexpr

Definition at line 170 of file LayersFwd.hpp.

◆ LayerEnumOf() [56/79]

constexpr LayerType armnn::LayerEnumOf ( const QuantizeLayer )
constexpr

Definition at line 168 of file LayersFwd.hpp.

◆ LayerEnumOf() [57/79]

constexpr LayerType armnn::LayerEnumOf ( const RankLayer )
constexpr

Definition at line 171 of file LayersFwd.hpp.

◆ LayerEnumOf() [58/79]

constexpr LayerType armnn::LayerEnumOf ( const ReduceLayer )
constexpr

Definition at line 172 of file LayersFwd.hpp.

◆ LayerEnumOf() [59/79]

constexpr LayerType armnn::LayerEnumOf ( const ReshapeLayer )
constexpr

Definition at line 173 of file LayersFwd.hpp.

◆ LayerEnumOf() [60/79]

constexpr LayerType armnn::LayerEnumOf ( const ResizeLayer )
constexpr

Definition at line 174 of file LayersFwd.hpp.

◆ LayerEnumOf() [61/79]

constexpr LayerType armnn::LayerEnumOf ( const ReverseV2Layer )
constexpr

Definition at line 175 of file LayersFwd.hpp.

◆ LayerEnumOf() [62/79]

constexpr LayerType armnn::LayerEnumOf ( const ScatterNdLayer )
constexpr

Definition at line 176 of file LayersFwd.hpp.

◆ LayerEnumOf() [63/79]

constexpr LayerType armnn::LayerEnumOf ( const ShapeLayer )
constexpr

Definition at line 177 of file LayersFwd.hpp.

◆ LayerEnumOf() [64/79]

constexpr LayerType armnn::LayerEnumOf ( const SliceLayer )
constexpr

Definition at line 178 of file LayersFwd.hpp.

◆ LayerEnumOf() [65/79]

constexpr LayerType armnn::LayerEnumOf ( const SoftmaxLayer )
constexpr

Definition at line 179 of file LayersFwd.hpp.

◆ LayerEnumOf() [66/79]

constexpr LayerType armnn::LayerEnumOf ( const SpaceToBatchNdLayer )
constexpr

Definition at line 180 of file LayersFwd.hpp.

◆ LayerEnumOf() [67/79]

constexpr LayerType armnn::LayerEnumOf ( const SpaceToDepthLayer )
constexpr

Definition at line 181 of file LayersFwd.hpp.

◆ LayerEnumOf() [68/79]

constexpr LayerType armnn::LayerEnumOf ( const SplitterLayer )
constexpr

Definition at line 182 of file LayersFwd.hpp.

◆ LayerEnumOf() [69/79]

constexpr LayerType armnn::LayerEnumOf ( const StackLayer )
constexpr

Definition at line 183 of file LayersFwd.hpp.

◆ LayerEnumOf() [70/79]

constexpr LayerType armnn::LayerEnumOf ( const StandInLayer )
constexpr

Definition at line 184 of file LayersFwd.hpp.

◆ LayerEnumOf() [71/79]

constexpr LayerType armnn::LayerEnumOf ( const StridedSliceLayer )
constexpr

Definition at line 185 of file LayersFwd.hpp.

◆ LayerEnumOf() [72/79]

constexpr LayerType armnn::LayerEnumOf ( const SubtractionLayer )
constexpr

Definition at line 186 of file LayersFwd.hpp.

◆ LayerEnumOf() [73/79]

constexpr LayerType armnn::LayerEnumOf ( const SwitchLayer )
constexpr

Definition at line 187 of file LayersFwd.hpp.

◆ LayerEnumOf() [74/79]

constexpr LayerType armnn::LayerEnumOf ( const T *  = nullptr)
constexpr

◆ LayerEnumOf() [75/79]

constexpr LayerType armnn::LayerEnumOf ( const TileLayer )
constexpr

Definition at line 188 of file LayersFwd.hpp.

◆ LayerEnumOf() [76/79]

constexpr LayerType armnn::LayerEnumOf ( const TransposeConvolution2dLayer )
constexpr

Definition at line 190 of file LayersFwd.hpp.

◆ LayerEnumOf() [77/79]

constexpr LayerType armnn::LayerEnumOf ( const TransposeLayer )
constexpr

Definition at line 189 of file LayersFwd.hpp.

◆ LayerEnumOf() [78/79]

constexpr LayerType armnn::LayerEnumOf ( const UnidirectionalSequenceLstmLayer )
constexpr

Definition at line 191 of file LayersFwd.hpp.

◆ LayerEnumOf() [79/79]

constexpr LayerType armnn::LayerEnumOf ( const UnmapLayer )
constexpr

Definition at line 192 of file LayersFwd.hpp.

◆ LevelToString()

std::string armnn::LevelToString ( LogSeverity  level)
inline

Definition at line 22 of file Logging.hpp.

23 {
24  switch(level)
25  {
26  case LogSeverity::Trace:
27  return "Trace";
28  case LogSeverity::Debug:
29  return "Debug";
30  case LogSeverity::Info:
31  return "Info";
32  case LogSeverity::Warning:
33  return "Warning";
34  case LogSeverity::Error:
35  return "Error";
36  case LogSeverity::Fatal:
37  return "Fatal";
38  default:
39  return "Log";
40  }
41 }

References Debug, Error, Fatal, Info, Trace, and Warning.

Referenced by ScopedRecord::ScopedRecord().

◆ LogSoftmax()

void LogSoftmax ( Decoder< float > &  input,
Encoder< float > &  output,
const TensorInfo inputInfo,
const LogSoftmaxDescriptor descriptor 
)

Definition at line 27 of file LogSoftmax.cpp.

31 {
32  const unsigned int numDimensions = inputInfo.GetNumDimensions();
33 
34  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(ValidateAxis(descriptor.m_Axis, numDimensions),
35  "Axis index is not in range [-numDimensions, numDimensions).");
36 
37  unsigned int uAxis = descriptor.m_Axis < 0 ?
38  numDimensions - armnn::numeric_cast<unsigned int>(std::abs(descriptor.m_Axis)) :
39  armnn::numeric_cast<unsigned int>(descriptor.m_Axis);
40 
41  const TensorShape& inputShape = inputInfo.GetShape();
42  const unsigned int outerSize = armnnUtils::GetNumElementsBetween(inputShape, 0, uAxis);
43  const unsigned int axisSize = inputShape[uAxis];
44  const unsigned int innerSize = armnnUtils::GetNumElementsBetween(inputShape,
45  uAxis + 1,
46  inputShape.GetNumDimensions());
47 
48  for (unsigned int outer = 0; outer < outerSize; ++outer)
49  {
50  for (unsigned int inner = 0; inner < innerSize; ++inner)
51  {
52  // Find max
53  input[outer * axisSize * innerSize + inner];
54  float maxValue = input.Get();
55  for (unsigned int i = 1u; i < axisSize; ++i)
56  {
57  input[(outer * axisSize + i) * innerSize + inner];
58  maxValue = std::max(maxValue, input.Get());
59  }
60 
61  // Compute sum
62  float sum = 0.0f;
63  for (unsigned int i = 0u; i < axisSize; ++i)
64  {
65  input[(outer * axisSize + i) * innerSize + inner];
66  sum += std::exp((input.Get() - maxValue) * descriptor.m_Beta);
67  }
68 
69  // Compute log sum
70  const float logSum = std::log(sum);
71 
72  // Compute result
73  for (unsigned int i = 0u; i < axisSize; ++i)
74  {
75  const unsigned int index = (outer * axisSize + i) * innerSize + inner;
76 
77  input [index];
78  output[index];
79 
80  output.Set((input.Get() - maxValue) * descriptor.m_Beta - logSum);
81  }
82  }
83  }
84 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), SoftmaxDescriptor::m_Axis, SoftmaxDescriptor::m_Beta, and Encoder< IType >::Set().

◆ LstmImpl()

void LstmImpl ( const LstmDescriptor descriptor,
const TensorInfo inputInfo,
const TensorInfo outputInfo,
const TensorShape inputToOutputWeightsShape,
const TensorShape recurrentToOutputWeightsShape,
std::unique_ptr< Decoder< float >> &  inputData,
std::unique_ptr< Decoder< float >> &  outputStateIn,
std::unique_ptr< Decoder< float >> &  cellStateIn,
std::unique_ptr< Encoder< float >> &  outputStateOut,
std::unique_ptr< Encoder< float >> &  cellStateOut,
std::unique_ptr< Encoder< float >> &  output,
std::unique_ptr< Decoder< float >> &  cellStateOutDecoder,
std::unique_ptr< Decoder< float >> &  outputDecoder,
std::unique_ptr< Decoder< float >> &  inputToInputWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputToForgetWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputToCellWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputToOutputWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToInputWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToForgetWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToCellWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToOutputWeightsTensor,
std::unique_ptr< Decoder< float >> &  cellToInputWeightsTensor,
std::unique_ptr< Decoder< float >> &  cellToForgetWeightsTensor,
std::unique_ptr< Decoder< float >> &  cellToOutputWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputGateBiasTensor,
std::unique_ptr< Decoder< float >> &  forgetGateBiasTensor,
std::unique_ptr< Decoder< float >> &  cellBiasTensor,
std::unique_ptr< Decoder< float >> &  outputGateBiasTensor,
std::unique_ptr< Decoder< float >> &  projectionWeightsTensor,
std::unique_ptr< Decoder< float >> &  projectionBiasTensor,
std::unique_ptr< Decoder< float >> &  inputLayerNormWeights,
std::unique_ptr< Decoder< float >> &  forgetLayerNormWeights,
std::unique_ptr< Decoder< float >> &  cellLayerNormWeights,
std::unique_ptr< Decoder< float >> &  outputLayerNormWeights,
std::unique_ptr< Encoder< float >> &  inputGateScratch,
std::unique_ptr< Encoder< float >> &  cellScratch,
std::unique_ptr< Encoder< float >> &  forgetGateScratch,
std::unique_ptr< Encoder< float >> &  outputGateScratch,
std::unique_ptr< Decoder< float >> &  inputGateScratchDecoder,
std::unique_ptr< Decoder< float >> &  cellScratchDecoder,
std::unique_ptr< Decoder< float >> &  forgetGateScratchDecoder,
std::unique_ptr< Decoder< float >> &  outputGateScratchDecoder,
float  layerNormEpsilon 
)

Definition at line 13 of file Lstm.cpp.

56 {
57  // This is a porting of the LSTM::Eval() method in the Android code base
58  // Refer to: android/frameworks/ml/nn/common/operations/LSTM.cpp
59 
60  const TensorShape& inputShape = inputInfo.GetShape();
61  const DataType& outputType = outputInfo.GetDataType();
62 
63  const uint32_t nBatch = inputShape[0];
64  const uint32_t nInput = inputShape[1];
65 
66  const uint32_t nCell = inputToOutputWeightsShape[0];
67  const uint32_t nOutput = recurrentToOutputWeightsShape[1];
68 
69  const bool useCifg = descriptor.m_CifgEnabled;
70  const bool usePeephole = descriptor.m_PeepholeEnabled;
71  const bool useLayerNorm = descriptor.m_LayerNormEnabled;
72 
73  if (!useLayerNorm)
74  {
75  // Initialize scratch buffers with bias.
76  if (!useCifg)
77  {
78  VectorBatchVectorAssign(*inputGateBiasTensor,
79  nCell, nBatch, *inputGateScratch);
80  }
81  VectorBatchVectorAssign(*forgetGateBiasTensor,
82  nCell, nBatch, *forgetGateScratch);
83  VectorBatchVectorAssign(*cellBiasTensor,
84  nCell, nBatch, *cellScratch);
85  VectorBatchVectorAssign(*outputGateBiasTensor,
86  nCell, nBatch, *outputGateScratch);
87  }
88  else
89  {
90  // Initialize scratch buffers with zeroes.
91  if (!useCifg)
92  {
93  ZeroVector(*inputGateScratch, nCell * nBatch);
94  }
95  ZeroVector(*forgetGateScratch, nCell * nBatch);
96  ZeroVector(*cellScratch , nCell * nBatch);
97  ZeroVector(*outputGateScratch, nCell * nBatch);
98  }
99 
100  // For each batch and cell: compute input_weight * input.
101  if (!useCifg)
102  {
103  MatrixBatchVectorMultiplyAccumulate(*inputToInputWeightsTensor,
104  nCell, nInput, *inputData, nBatch, *inputGateScratch);
105  }
106  MatrixBatchVectorMultiplyAccumulate(*inputToForgetWeightsTensor,
107  nCell, nInput, *inputData, nBatch, *forgetGateScratch);
108  MatrixBatchVectorMultiplyAccumulate(*inputToCellWeightsTensor,
109  nCell, nInput, *inputData, nBatch, *cellScratch);
110  MatrixBatchVectorMultiplyAccumulate(*inputToOutputWeightsTensor,
111  nCell, nInput, *inputData, nBatch, *outputGateScratch);
112 
113  // For each batch and cell: compute recurrent_weight * output_state.
114  if (!useCifg)
115  {
116  MatrixBatchVectorMultiplyAccumulate(*recurrentToInputWeightsTensor,
117  nCell, nOutput, *outputStateIn, nBatch, *inputGateScratch);
118  }
119  MatrixBatchVectorMultiplyAccumulate(*recurrentToForgetWeightsTensor,
120  nCell, nOutput, *outputStateIn, nBatch, *forgetGateScratch);
121  MatrixBatchVectorMultiplyAccumulate(*recurrentToCellWeightsTensor,
122  nCell, nOutput, *outputStateIn, nBatch, *cellScratch);
123  MatrixBatchVectorMultiplyAccumulate(*recurrentToOutputWeightsTensor,
124  nCell, nOutput, *outputStateIn, nBatch, *outputGateScratch);
125 
126  // For each batch and cell: update input gate.
127  if (!useCifg)
128  {
129  if (usePeephole)
130  {
131  VectorBatchVectorCwiseProductAccumulate(*cellToInputWeightsTensor,
132  nCell, *cellStateIn, nBatch, *inputGateScratch);
133  }
134  if (useLayerNorm)
135  {
136  MeanStddevNormalization(*inputGateScratchDecoder,
137  *inputGateScratch, nCell, nBatch, layerNormEpsilon);
138  VectorBatchVectorCwiseProduct(*inputLayerNormWeights,
139  nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
140  VectorBatchVectorAdd(*inputGateBiasTensor,
141  nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
142  }
143  Activation(*inputGateScratchDecoder, *inputGateScratch,
144  TensorInfo({nCell, nBatch}, outputType),
145  ActivationFunction::Sigmoid, 0, 0);
146  }
147 
148  // For each batch and cell: update forget gate.
149  if (usePeephole)
150  {
151  VectorBatchVectorCwiseProductAccumulate(*cellToForgetWeightsTensor, nCell,
152  *cellStateIn, nBatch, *forgetGateScratch);
153  }
154  if (useLayerNorm)
155  {
156  MeanStddevNormalization(*forgetGateScratchDecoder,
157  *forgetGateScratch, nCell, nBatch, layerNormEpsilon);
158  VectorBatchVectorCwiseProduct(*forgetLayerNormWeights,
159  nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
160  VectorBatchVectorAdd(*forgetGateBiasTensor,
161  nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
162  }
163  Activation(*forgetGateScratchDecoder, *forgetGateScratch,
164  TensorInfo({nCell, nBatch}, outputType),
165  ActivationFunction::Sigmoid, 0, 0);
166 
167  // For each batch and cell: update the cell.
168  if (useLayerNorm)
169  {
170  MeanStddevNormalization(*cellScratchDecoder,
171  *cellScratch, nCell, nBatch, layerNormEpsilon);
172  VectorBatchVectorCwiseProduct(*cellLayerNormWeights,
173  nCell, *cellScratchDecoder, nBatch, *cellScratch);
174  VectorBatchVectorAdd(*cellBiasTensor,
175  nCell, *cellScratchDecoder, nBatch, *cellScratch);
176  }
177 
178  VectorVectorCwiseProduct(*forgetGateScratchDecoder, *cellStateIn, nBatch * nCell, *cellStateOut);
179 
180  ActivationFunction armnnActivationFunc = ActivationFunction::Sigmoid;
181  float a = 0;
182  float b = 0;
183  SetActivationParameters(descriptor.m_ActivationFunc, armnnActivationFunc, a, b);
184 
185  if (descriptor.m_ActivationFunc > 0)
186  {
187  Activation(*cellScratchDecoder, *cellScratch,
188  TensorInfo({nCell, nBatch}, outputType),
189  armnnActivationFunc, a, b);
190  }
191  if (useCifg)
192  {
193  Sub1Vector(*forgetGateScratchDecoder, nBatch * nCell, *forgetGateScratch);
195  *cellScratchDecoder, *forgetGateScratchDecoder, nBatch * nCell, *cellStateOut);
196  }
197  else
198  {
200  *cellScratchDecoder, *inputGateScratchDecoder, nBatch * nCell, *cellStateOut);
201  }
202  if (descriptor.m_ClippingThresCell > 0.0)
203  {
204  ClipVector(*cellStateOutDecoder, nBatch * nCell, descriptor.m_ClippingThresCell, *cellStateOut);
205  }
206 
207  // For each batch and cell: update the output gate.
208  if (usePeephole)
209  {
210  VectorBatchVectorCwiseProductAccumulate(*cellToOutputWeightsTensor,
211  nCell, *cellStateOutDecoder, nBatch, *outputGateScratch);
212  }
213  if (useLayerNorm)
214  {
215  MeanStddevNormalization(*outputGateScratchDecoder,
216  *outputGateScratch, nCell, nBatch, layerNormEpsilon);
217  VectorBatchVectorCwiseProduct(*outputLayerNormWeights,
218  nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
219  VectorBatchVectorAdd(*outputGateBiasTensor,
220  nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
221  }
222  Activation(*outputGateScratchDecoder, *outputGateScratch,
223  TensorInfo({nCell, nBatch}, outputType),
224  ActivationFunction::Sigmoid, 0, 0);
225 
226  if (descriptor.m_ActivationFunc > 0)
227  {
228  Activation(*cellStateOutDecoder, *cellScratch,
229  TensorInfo({nCell, nBatch}, outputType),
230  armnnActivationFunc, a, b);
231  }
232 
233  VectorVectorCwiseProduct(*outputGateScratchDecoder, *cellScratchDecoder, nBatch * nCell, *outputGateScratch);
234 
235  // For each batch: update the projection and output_state.
236  if (descriptor.m_ProjectionEnabled)
237  {
238  if (projectionBiasTensor)
239  {
240  VectorBatchVectorAssign(*projectionBiasTensor,
241  nOutput, nBatch, *output);
242  }
243  MatrixBatchVectorMultiplyAccumulate(*projectionWeightsTensor,
244  nOutput, nCell, *outputGateScratchDecoder, nBatch, *output);
245 
246  if (descriptor.m_ClippingThresProj > 0.0)
247  {
248  ClipVector(*outputDecoder, nBatch * nOutput, descriptor.m_ClippingThresProj, *output);
249  }
250  }
251  else
252  {
253  CopyVector(*outputGateScratchDecoder, nBatch * nOutput, *output);
254  }
255 
256  CopyVector(*outputDecoder, nBatch * nOutput, *outputStateOut);
257 }

References Activation(), ClipVector(), CopyVector(), TensorInfo::GetDataType(), TensorInfo::GetShape(), LstmDescriptor::m_ActivationFunc, LstmDescriptor::m_CifgEnabled, LstmDescriptor::m_ClippingThresCell, LstmDescriptor::m_ClippingThresProj, LstmDescriptor::m_LayerNormEnabled, LstmDescriptor::m_PeepholeEnabled, LstmDescriptor::m_ProjectionEnabled, MatrixBatchVectorMultiplyAccumulate(), MeanStddevNormalization(), SetActivationParameters(), Sigmoid, Sub1Vector(), VectorBatchVectorAdd(), VectorBatchVectorAssign(), VectorBatchVectorCwiseProduct(), VectorBatchVectorCwiseProductAccumulate(), VectorVectorCwiseProduct(), VectorVectorCwiseProductAccumulate(), and ZeroVector().

◆ MakeDecoder() [1/2]

std::unique_ptr< Decoder< int32_t > > MakeDecoder ( const TensorInfo info,
const void *  data 
)
inline

Definition at line 64 of file Decoders.hpp.

65 {
66  switch(info.GetDataType())
67  {
68  case DataType::QAsymmS8:
69  {
70  return std::make_unique<QASymmS8Decoder>(
71  static_cast<const int8_t*>(data),
72  info.GetQuantizationScale(),
73  info.GetQuantizationOffset());
74  }
75  case DataType::QAsymmU8:
76  {
77  return std::make_unique<QASymm8Decoder>(
78  static_cast<const uint8_t*>(data),
79  info.GetQuantizationScale(),
80  info.GetQuantizationOffset());
81  }
82  case DataType::QSymmS16:
83  {
84  return std::make_unique<QSymm16Decoder>(
85  static_cast<const int16_t*>(data),
86  info.GetQuantizationScale(),
87  info.GetQuantizationOffset());
88  }
89  case DataType::Float16:
90  {
91  return std::make_unique<Float16Decoder>(static_cast<const Half*>(data));
92  }
93  case DataType::Float32:
94  {
95  return std::make_unique<Float32Decoder>(static_cast<const float*>(data));
96  }
97  case DataType::Signed32:
98  {
99  return MakeSigned32Decoder(info, data);
100  }
101  case DataType::QSymmS8:
102  {
103  if (info.HasPerAxisQuantization())
104  {
105  return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
106  }
107  else
108  {
109  return std::make_unique<QSymmS8Decoder>(
110  static_cast<const int8_t*>(data),
111  info.GetQuantizationScale(),
112  info.GetQuantizationOffset());
113  }
114  }
116  {
117  return std::make_unique<BooleanDecoder>(static_cast<const uint8_t*>(data));
118  }
119  default:
120  {
121  throw InvalidArgumentException("Unsupported target Data Type!");
122  break;
123  }
124  }
125  return nullptr;
126 }

References Boolean, Float16, Float32, info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeDecoder() [2/2]

std::unique_ptr<Decoder<T> > armnn::MakeDecoder ( const TensorInfo info,
const void *  data = nullptr 
)
inline

Definition at line 64 of file Decoders.hpp.

65 {
66  switch(info.GetDataType())
67  {
68  case DataType::QAsymmS8:
69  {
70  return std::make_unique<QASymmS8Decoder>(
71  static_cast<const int8_t*>(data),
72  info.GetQuantizationScale(),
73  info.GetQuantizationOffset());
74  }
75  case DataType::QAsymmU8:
76  {
77  return std::make_unique<QASymm8Decoder>(
78  static_cast<const uint8_t*>(data),
79  info.GetQuantizationScale(),
80  info.GetQuantizationOffset());
81  }
82  case DataType::QSymmS16:
83  {
84  return std::make_unique<QSymm16Decoder>(
85  static_cast<const int16_t*>(data),
86  info.GetQuantizationScale(),
87  info.GetQuantizationOffset());
88  }
89  case DataType::Float16:
90  {
91  return std::make_unique<Float16Decoder>(static_cast<const Half*>(data));
92  }
93  case DataType::Float32:
94  {
95  return std::make_unique<Float32Decoder>(static_cast<const float*>(data));
96  }
97  case DataType::Signed32:
98  {
99  return MakeSigned32Decoder(info, data);
100  }
101  case DataType::QSymmS8:
102  {
103  if (info.HasPerAxisQuantization())
104  {
105  return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
106  }
107  else
108  {
109  return std::make_unique<QSymmS8Decoder>(
110  static_cast<const int8_t*>(data),
111  info.GetQuantizationScale(),
112  info.GetQuantizationOffset());
113  }
114  }
116  {
117  return std::make_unique<BooleanDecoder>(static_cast<const uint8_t*>(data));
118  }
119  default:
120  {
121  throw InvalidArgumentException("Unsupported target Data Type!");
122  break;
123  }
124  }
125  return nullptr;
126 }

References Boolean, Float16, Float32, info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeEncoder() [1/2]

std::unique_ptr< Encoder< int32_t > > MakeEncoder ( const TensorInfo info,
void *  data 
)
inline

Definition at line 19 of file Encoders.hpp.

20 {
21  switch(info.GetDataType())
22  {
24  {
25  return std::make_unique<QASymmS8Encoder>(
26  static_cast<int8_t*>(data),
27  info.GetQuantizationScale(),
28  info.GetQuantizationOffset());
29  }
31  {
32  return std::make_unique<QASymm8Encoder>(
33  static_cast<uint8_t*>(data),
34  info.GetQuantizationScale(),
35  info.GetQuantizationOffset());
36  }
37  case DataType::QSymmS8:
38  {
39  if (info.HasPerAxisQuantization())
40  {
41  return std::make_unique<QSymm8PerAxisEncoder>(static_cast<int8_t*>(data), info);
42  }
43  else
44  {
45  return std::make_unique<QSymmS8Encoder>(
46  static_cast<int8_t*>(data),
47  info.GetQuantizationScale(),
48  info.GetQuantizationOffset());
49  }
50  }
52  {
53  if (info.HasPerAxisQuantization())
54  {
55  unsigned int axis = info.GetQuantizationDim().value();
56  auto axisDimensionality = info.GetShape()[axis];
57  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
58  return std::make_unique<QSymm16PerAxisEncoder>(
59  static_cast<int16_t*>(data),
60  params.second,
61  params.first,
62  axisDimensionality);
63  }
64  else
65  {
66  return std::make_unique<QSymm16Encoder>(
67  static_cast<int16_t *>(data),
68  info.GetQuantizationScale(),
69  info.GetQuantizationOffset());
70  }
71  }
73  {
74  return std::make_unique<Int32Encoder>(static_cast<int32_t*>(data));
75  }
77  {
78  return std::make_unique<Float16Encoder>(static_cast<Half*>(data));
79  }
81  {
82  return std::make_unique<Float32Encoder>(static_cast<float*>(data));
83  }
84  default:
85  {
86  throw InvalidArgumentException("Unsupported target Data Type!");
87  break;
88  }
89  }
90  return nullptr;
91 }

References Float16, Float32, armnnUtils::GetPerAxisParams(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeEncoder() [2/2]

std::unique_ptr<Encoder<T> > armnn::MakeEncoder ( const TensorInfo info,
void *  data = nullptr 
)
inline

Definition at line 19 of file Encoders.hpp.

20 {
21  switch(info.GetDataType())
22  {
24  {
25  return std::make_unique<QASymmS8Encoder>(
26  static_cast<int8_t*>(data),
27  info.GetQuantizationScale(),
28  info.GetQuantizationOffset());
29  }
31  {
32  return std::make_unique<QASymm8Encoder>(
33  static_cast<uint8_t*>(data),
34  info.GetQuantizationScale(),
35  info.GetQuantizationOffset());
36  }
37  case DataType::QSymmS8:
38  {
39  if (info.HasPerAxisQuantization())
40  {
41  return std::make_unique<QSymm8PerAxisEncoder>(static_cast<int8_t*>(data), info);
42  }
43  else
44  {
45  return std::make_unique<QSymmS8Encoder>(
46  static_cast<int8_t*>(data),
47  info.GetQuantizationScale(),
48  info.GetQuantizationOffset());
49  }
50  }
52  {
53  if (info.HasPerAxisQuantization())
54  {
55  unsigned int axis = info.GetQuantizationDim().value();
56  auto axisDimensionality = info.GetShape()[axis];
57  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
58  return std::make_unique<QSymm16PerAxisEncoder>(
59  static_cast<int16_t*>(data),
60  params.second,
61  params.first,
62  axisDimensionality);
63  }
64  else
65  {
66  return std::make_unique<QSymm16Encoder>(
67  static_cast<int16_t *>(data),
68  info.GetQuantizationScale(),
69  info.GetQuantizationOffset());
70  }
71  }
73  {
74  return std::make_unique<Int32Encoder>(static_cast<int32_t*>(data));
75  }
77  {
78  return std::make_unique<Float16Encoder>(static_cast<Half*>(data));
79  }
81  {
82  return std::make_unique<Float32Encoder>(static_cast<float*>(data));
83  }
84  default:
85  {
86  throw InvalidArgumentException("Unsupported target Data Type!");
87  break;
88  }
89  }
90  return nullptr;
91 }

References Float16, Float32, armnnUtils::GetPerAxisParams(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeInfo()

arm_compute::DetectionPostProcessLayerInfo armnn::MakeInfo ( const DetectionPostProcessDescriptor descriptor)

Definition at line 17 of file NeonDetectionPostProcessWorkload.cpp.

18 {
19  return arm_compute::DetectionPostProcessLayerInfo(descriptor.m_MaxDetections,
20  descriptor.m_MaxClassesPerDetection,
21  descriptor.m_NmsScoreThreshold,
22  descriptor.m_NmsIouThreshold,
23  descriptor.m_NumClasses,
24  { descriptor.m_ScaleX,
25  descriptor.m_ScaleY,
26  descriptor.m_ScaleW,
27  descriptor.m_ScaleH },
28  descriptor.m_UseRegularNms,
29  descriptor.m_DetectionsPerClass);
30 }

References DetectionPostProcessDescriptor::m_DetectionsPerClass, DetectionPostProcessDescriptor::m_MaxClassesPerDetection, DetectionPostProcessDescriptor::m_MaxDetections, DetectionPostProcessDescriptor::m_NmsIouThreshold, DetectionPostProcessDescriptor::m_NmsScoreThreshold, DetectionPostProcessDescriptor::m_NumClasses, and DetectionPostProcessDescriptor::m_UseRegularNms.

Referenced by NeonDetectionPostProcessValidate().

◆ MakeOptimizations()

Optimizer::Optimizations armnn::MakeOptimizations ( Args &&...  args)

Definition at line 43 of file Optimizer.hpp.

44 {
45  Optimizer::Optimizations optimizations;
46 
47  Append(optimizations, std::forward<Args>(args)...);
48 
49  return optimizations;
50 }

References Append().

Referenced by ApplyBackendOptimizations(), and Optimize().

◆ MakeOptional()

Optional<T> armnn::MakeOptional ( Args &&...  args)

Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> object.

Definition at line 305 of file Optional.hpp.

306 {
307  return Optional<T>(CONSTRUCT_IN_PLACE, std::forward<Args>(args)...);
308 }

References CONSTRUCT_IN_PLACE.

◆ MakeTransformIterator()

constexpr TransformIterator<Function, Iterator> armnn::MakeTransformIterator ( Iterator  i,
Function  f 
)
constexpr

Definition at line 90 of file TransformIterator.hpp.

91 {
92  return TransformIterator<Function, Iterator>(i, f);
93 }

◆ MirrorPad()

void MirrorPad ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const ITensorHandle inputHandle,
ITensorHandle outputHandle,
const PadQueueDescriptor data 
)

Definition at line 59 of file MirrorPad.cpp.

64 {
65  auto padList = data.m_Parameters.m_PadList;
66  PaddingMode paddingMode = data.m_Parameters.m_PaddingMode;
67 
68  TensorShape outputShape = outputInfo.GetShape();
69  TensorShape inputShape = inputInfo.GetShape();
70 
71  unsigned int numOutputElements = outputInfo.GetNumElements();
72  unsigned int numInputDimensions = inputShape.GetNumDimensions();
73  assert(numInputDimensions == outputShape.GetNumDimensions());
74 
75  // If padding mode is Reflect then both paddings must be no greater than inputShape(i) - 1.
76  // If padding mode is Symmetric then both paddings must be no greater than inputShape(i).
77  const unsigned int isReflect = static_cast<unsigned int>(paddingMode == PaddingMode::Reflect);
78  for(unsigned int i = 0; i < padList.size(); ++i)
79  {
80  if(padList.at(i).first > (inputShape[i] - isReflect) ||
81  padList.at(i).second > (inputShape[i] - isReflect))
82  {
83  throw armnn::InvalidArgumentException("Paddings must be less (Reflect) or "
84  "equal (Symmetric) to the dimension size.");
85  }
86  }
87 
88  auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
89  auto outData = MakeEncoder<float>(outputInfo, outputHandle->Map());
90 
91  Decoder<float>& input = *inputData;
92  Encoder<float>& output = *outData;
93 
94  for(unsigned int idx = 0; idx < numOutputElements; ++idx)
95  {
96  // Get the coordinates of the current index in vector form. E.g inx 1 = [0, 0, 0, 1 ]
97  const std::vector<unsigned int> coord = IndexToCoord(outputShape, idx);
98 
99  std::vector<unsigned int> dimensions;
100  std::vector<unsigned int> coords;
101 
102  for(unsigned int i = 0; i < numInputDimensions; ++i)
103  {
104  dimensions.emplace_back(i);
105  coords.emplace_back(coord[i]);
106  }
107 
108  auto isInPadding = [&](unsigned int i)
109  {
110  return (coords[i] < padList[i].first || coords[i] > inputShape[i] + padList[i].first - 1);
111  };
112 
113  auto getReflectIndex = [&](unsigned int i) -> unsigned int
114  {
115  if(isInPadding(i))
116  {
117  if(coords[i] < padList[i].first)
118  {
119  return padList[i].first - coords[i];
120  }
121  else
122  {
123  return 2 * inputShape[i] + padList[i].first - 2 - coords[i];
124  }
125  }
126  return coords[i] - padList[i].first;
127  };
128 
129  auto getSymmetricIndex = [&](unsigned int i) -> unsigned int
130  {
131  if(isInPadding(i))
132  {
133  if(coords[i] < padList[i].first)
134  {
135  return padList[i].first - coords[i] - 1;
136  }
137  else
138  {
139  return 2 * inputShape[i] + padList[i].first - 1 - coords[i];
140  }
141  }
142  return coords[i] - padList[i].first;
143  };
144 
145  // Location of the value in the input tensor to use in the output.
146  std::vector<unsigned int> coordOfInput;
147 
148  // any_of works as a loop here to check if any of the dimensions are in the padding.
149  // If dimensions is in the padding area, then create the coordinates of the location in the
150  // input tensor to use in the output.
151  // E.g.
152  // Input tensor = [ 1, 2, 3 ], Rank = 1.
153  // Output tensor = [ 2, 1, 2, 3, 1 ] if Reflect or [ 1, 1, 2, 3, 3 ] if Symmetric with a padding of (1, 1).
154  // So it will either return [ 1 ] or [ 0 ] which is used to set the first value in the output tensor and so on.
155  if(std::any_of(dimensions.begin(), dimensions.end(), isInPadding))
156  {
157  switch(paddingMode)
158  {
159  case PaddingMode::Reflect:
160  {
161  for(unsigned int i = 0; i < numInputDimensions; ++i)
162  {
163  coordOfInput.emplace_back(getReflectIndex(i));
164  }
165  break;
166  }
167  case PaddingMode::Symmetric:
168  {
169  for(unsigned int i = 0; i < numInputDimensions; ++i)
170  {
171  coordOfInput.emplace_back(getSymmetricIndex(i));
172  }
173  break;
174  }
175  default:
176  throw InvalidArgumentException("Padding mode not supported.");
177  break;
178  }
179  }
180  else
181  {
182  for(unsigned int i = 0; i < numInputDimensions; ++i)
183  {
184  coordOfInput.emplace_back(coord[i] - padList[i].first);
185  }
186  }
187 
188  // Set output value using the coordinate of the input value to use.
189  const unsigned int indexOfInput = CoordToIndex(inputShape, coordOfInput);
190 
191  input[indexOfInput];
192  auto inputValue = input.Get();
193 
194  output[idx];
195  output.Set(inputValue);
196  }
197 }

References Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), PadDescriptor::m_PaddingMode, PadDescriptor::m_PadList, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, ITensorHandle::Map(), Reflect, Encoder< IType >::Set(), and Symmetric.

◆ MockTensorHandleFactoryId()

constexpr const char* armnn::MockTensorHandleFactoryId ( )
constexpr

Definition at line 14 of file MockTensorHandleFactory.hpp.

15 {
16  return "Arm/Mock/TensorHandleFactory";
17 }

Referenced by MockTensorHandleFactory::GetIdStatic().

◆ NeonAbsWorkloadValidate()

arm_compute::Status NeonAbsWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonAbsWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEAbsLayer::validate(&aclInput, &aclOutput);
23 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonActivationWorkloadValidate()

arm_compute::Status NeonActivationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ActivationDescriptor descriptor 
)

Definition at line 17 of file NeonActivationWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  const arm_compute::ActivationLayerInfo activationLayerInfo =
26 
27  return arm_compute::NEActivationLayer::validate(&aclInput,
28  &aclOutput,
29  activationLayerInfo);
30 }

Referenced by NeonLayerSupport::IsActivationSupported().

◆ NeonAdditionWorkloadValidate()

arm_compute::Status NeonAdditionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 20 of file NeonAdditionWorkload.cpp.

24 {
25  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
26  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
27  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
28 
29  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
30  activationDescriptor);
31 
32  return arm_compute::NEArithmeticAddition::validate(&aclInput0,
33  &aclInput1,
34  &aclOutput,
35  arm_compute::ConvertPolicy::SATURATE,
36  activationInfo);
37 }

Referenced by NeonLayerSupport::IsAdditionSupported(), IsLayerTypeSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonArgMinMaxWorkloadValidate()

arm_compute::Status NeonArgMinMaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ArgMinMaxDescriptor descriptor 
)

Definition at line 31 of file NeonArgMinMaxWorkload.cpp.

34 {
35  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
36  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
37 
38  auto numDims = input.GetNumDimensions();
39  auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis);
40  int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
41 
42  if (descriptor.m_Function == ArgMinMaxFunction::Max)
43  {
44  return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
45  arm_compute::ReductionOperation::ARG_IDX_MAX);
46  }
47  else
48  {
49  return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
50  arm_compute::ReductionOperation::ARG_IDX_MIN);
51  }
52 }

Referenced by NeonLayerSupport::IsArgMinMaxSupported().

◆ NeonBackendId()

constexpr const char* armnn::NeonBackendId ( )
constexpr

Definition at line 10 of file NeonBackendId.hpp.

10 { return "CpuAcc"; }

Referenced by NeonBackend::GetIdStatic().

◆ NeonBatchMatMulValidate()

arm_compute::Status NeonBatchMatMulValidate ( const TensorInfo inputInfoX,
const TensorInfo inputInfoY,
const TensorInfo outputInfo,
const BatchMatMulDescriptor descriptor,
const bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file NeonBatchMatMulWorkload.cpp.

25 {
26  if (descriptor.m_AdjointX || descriptor.m_AdjointY )
27  {
28  throw Exception("Support for adjoint not implemented.");
29  }
30  if (descriptor.m_DataLayoutX != armnn::DataLayout::NCHW || descriptor.m_DataLayoutY != armnn::DataLayout::NCHW )
31  {
32  throw Exception("Only supported the MatMul in the last 2 dimensions");
33  }
34 
35  arm_compute::TensorInfo aclInputInfoX = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoX);
36  arm_compute::TensorInfo aclInputInfoY = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoY);
37  arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(outputInfo);
38 
39  // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
40  aclInputInfoX.set_are_values_constant(false);
41  aclInputInfoY.set_are_values_constant(false);
42 
43  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
44  activationDescriptor);
45 
46  arm_compute::MatMulInfo matMulInfo;
47  matMulInfo.adj_lhs(descriptor.m_TransposeX);
48  matMulInfo.adj_rhs(descriptor.m_TransposeY);
49 
50  arm_compute::CpuMatMulSettings settings;
51  settings.fast_math(isFastMathEnabled);
52 
53  return arm_compute::NEMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, settings,
54  activationInfo);
55 }

References BatchMatMulDescriptor::m_AdjointX, BatchMatMulDescriptor::m_AdjointY, BatchMatMulDescriptor::m_DataLayoutX, BatchMatMulDescriptor::m_DataLayoutY, and NCHW.

Referenced by NeonLayerSupport::IsBatchMatMulSupported().

◆ NeonBatchNormalizationValidate()

arm_compute::Status NeonBatchNormalizationValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo mean,
const TensorInfo var,
const TensorInfo beta,
const TensorInfo gamma,
const BatchNormalizationDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonBatchNormalizationWorkload.cpp.

32 {
33  const arm_compute::TensorInfo aclInputInfo =
34  armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
35  const arm_compute::TensorInfo aclOutputInfo =
36  armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
37  const arm_compute::TensorInfo aclMeanInfo =
38  armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
39  const arm_compute::TensorInfo aclVarInfo =
40  armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
41  const arm_compute::TensorInfo aclBetaInfo =
42  armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
43  const arm_compute::TensorInfo aclGammaInfo =
44  armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
45 
46  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
47  activationDescriptor);
48 
49  return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo,
50  &aclOutputInfo,
51  &aclMeanInfo,
52  &aclVarInfo,
53  &aclBetaInfo,
54  &aclGammaInfo,
55  descriptor.m_Eps,
56  activationInfo);
57 }

Referenced by NeonLayerSupport::IsBatchNormalizationSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonBatchToSpaceNdWorkloadValidate()

arm_compute::Status NeonBatchToSpaceNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const BatchToSpaceNdDescriptor descriptor 
)

Definition at line 15 of file NeonBatchToSpaceNdWorkload.cpp.

18 {
19  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
20  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
21 
22  arm_compute::Status statusBatchToSpace = arm_compute::Status(arm_compute::ErrorCode::OK);
23  arm_compute::Status statusReshapeInput = arm_compute::Status(arm_compute::ErrorCode::OK);
24  arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
25 
26  arm_compute::TensorInfo aclReshapeInputInfo = aclInputInfo;
27  arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
28 
29  // When a spacial dimension is missing (rank=3) set W to 1
30  const unsigned int rank = input.GetNumDimensions();
31  if (rank == 3)
32  {
33  const arm_compute::TensorShape inputShape = aclInputInfo.tensor_shape();
34  const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
35 
36  if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
37  {
38  // In ACL dimensions are right to left: C, W, H, N
39  aclReshapeInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
40  aclReshapeOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
41  }
42  else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
43  {
44  // In ACL dimensions are right to left: W, H, C, N
45  aclReshapeInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
46  aclReshapeOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
47  }
48  else
49  {
50  throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
51  }
52 
53  statusReshapeInput = arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
54  statusReshapeOutput = arm_compute::NEReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
55  }
56 
57  // ArmNN blockShape is [H, W] ACl asks for W, H
58  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
59  int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
60 
61  const arm_compute::CropInfo cropInfo = BuildArmComputeCropInfo(descriptor, rank);
62 
63  statusBatchToSpace = arm_compute::NEBatchToSpaceLayer::validate(rank == 3 ? &aclReshapeInputInfo : &aclInputInfo,
64  blockWidth,
65  blockHeight,
66  rank == 3 ? &aclReshapeOutputInfo : &aclOutputInfo,
67  cropInfo);
68 
69  if (statusReshapeInput.error_code() == arm_compute::ErrorCode::OK &&
70  statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
71  statusBatchToSpace.error_code() == arm_compute::ErrorCode::OK)
72  {
73  return arm_compute::Status(arm_compute::ErrorCode::OK,
74  "All BatchToSpace layers validate status OK.");
75  }
76  else
77  {
78  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
79  "BatchToSpace layer validate status failed."
80  + statusBatchToSpace.error_description()
81  + statusReshapeInput.error_description()
82  + statusReshapeOutput.error_description());
83  }
84 }

Referenced by NeonLayerSupport::IsBatchToSpaceNdSupported().

◆ NeonCastValidate()

arm_compute::Status NeonCastValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file NeonCastWorkload.cpp.

20 {
21  arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::NECast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
25 }

Referenced by NeonLayerSupport::IsCastSupported().

◆ NeonChannelShuffleValidate()

arm_compute::Status NeonChannelShuffleValidate ( const TensorInfo input,
const TensorInfo output,
const ChannelShuffleDescriptor descriptor 
)

Definition at line 17 of file NeonChannelShuffleWorkload.cpp.

20 {
21  arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  // In Arm NN and in NNAPI, channel shuffle implementation is datalayout agnostic and it has axis as a parameter.
25  // The channel shuffle Implementation for Neon is dependent on datalayout and does not have axis as a parameter,
26  // it only supports channel shuffle for 4D tensors in dimension C (1 or 3).
27  arm_compute::DataLayout aclDataLayout;
28  if (input.GetNumDimensions() == 4)
29  {
30  switch (descriptor.m_Axis)
31  {
32  case 1:
33  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
34  break;
35  case 3:
36  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NHWC);
37  break;
38  default:
39  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported axis"};
40  }
41  aclInputInfo.set_data_layout(aclDataLayout);
42  aclOutputInfo.set_data_layout(aclDataLayout);
43  return arm_compute::NEChannelShuffleLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_NumGroups);
44  }
45  else
46  {
47  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported number of dimensions"};
48  }
49 }

Referenced by NeonLayerSupport::IsChannelShuffleSupported().

◆ NeonComparisonWorkloadValidate()

arm_compute::Status NeonComparisonWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ComparisonDescriptor descriptor 
)

Definition at line 16 of file NeonComparisonWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput0 = BuildArmComputeTensorInfo(input0);
22  const arm_compute::TensorInfo aclInput1 = BuildArmComputeTensorInfo(input1);
23  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
24 
25  const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(descriptor);
26 
27  const arm_compute::Status aclStatus = arm_compute::NEElementwiseComparison::validate(&aclInput0,
28  &aclInput1,
29  &aclOutput,
30  comparisonOperation);
31  return aclStatus;
32 }

Referenced by NeonLayerSupport::IsComparisonSupported().

◆ NeonConcatWorkloadValidate()

arm_compute::Status NeonConcatWorkloadValidate ( const std::vector< const TensorInfo * > &  inputs,
const TensorInfo output,
const OriginsDescriptor descriptor 
)

Definition at line 27 of file NeonConcatWorkload.cpp.

31 {
32  std::vector<arm_compute::TensorInfo> aclInputs;
33  for (const TensorInfo* input : inputs)
34  {
35  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
36  aclInputs.emplace_back(aclInputInfo);
37  }
38  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
39  std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
40  for (arm_compute::ITensorInfo& input : aclInputs)
41  {
42  aclInputPtrs.emplace_back(&input);
43  }
44 
45  size_t aclAxis = CalcAxis(descriptor);
46  return arm_compute::NEConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
47 }

Referenced by NeonLayerSupport::IsConcatSupported().

◆ NeonConstantWorkloadValidate()

arm_compute::Status NeonConstantWorkloadValidate ( const TensorInfo output)

Definition at line 20 of file NeonConstantWorkload.cpp.

21 {
22  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  std::array<arm_compute::DataType,9> supportedTypes = {
25  arm_compute::DataType::BFLOAT16,
26  arm_compute::DataType::F16,
27  arm_compute::DataType::F32,
28  arm_compute::DataType::QASYMM8,
29  arm_compute::DataType::QASYMM8_SIGNED,
30  arm_compute::DataType::QSYMM16,
31  arm_compute::DataType::QSYMM8,
32  arm_compute::DataType::QSYMM8_PER_CHANNEL,
33  arm_compute::DataType::S32
34  };
35  auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
36 
37  if (it != end(supportedTypes))
38  {
39  return arm_compute::Status{};
40  }
41  else
42  {
43  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
44  }
45 }

Referenced by NeonLayerSupport::IsConstantSupported().

◆ NeonConvertFp16ToFp32WorkloadValidate()

arm_compute::Status NeonConvertFp16ToFp32WorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file NeonConvertFp16ToFp32Workload.cpp.

20 {
21  // Fallback to portable software implementation if Compute Library NECast won't work, so
22  // this method always returns success
23 
24  armnn::IgnoreUnused(input);
25  armnn::IgnoreUnused(output);
26  return arm_compute::Status();
27 }

References IgnoreUnused().

Referenced by NeonLayerSupport::IsConvertFp16ToFp32Supported().

◆ NeonConvertFp32ToFp16WorkloadValidate()

arm_compute::Status NeonConvertFp32ToFp16WorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 21 of file NeonConvertFp32ToFp16Workload.cpp.

22 {
23  // Fallback to portable software implementation if Compute Library NECast won't work, so
24  // this method always returns success
25 
26  armnn::IgnoreUnused(input);
27  armnn::IgnoreUnused(output);
28  return arm_compute::Status();
29 }

References IgnoreUnused().

Referenced by NeonLayerSupport::IsConvertFp32ToFp16Supported().

◆ NeonConvolution2dWorkloadValidate()

arm_compute::Status NeonConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonConvolution2dWorkload.cpp.

31 {
32  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
33  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
34  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
35  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
36 
37  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
38  descriptor.m_DilationY);
39 
40  arm_compute::TensorInfo aclBiasesInfo;
41  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
42 
43  if (descriptor.m_BiasEnabled)
44  {
45  if (!biases.has_value())
46  {
47  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
48  "ArmNN NeonConvolution2dWorkload has empty bias value."};
49  }
50  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
51  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
52  optionalAclBiasesInfo = &aclBiasesInfo;
53  }
54 
55  arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
56 
57  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
58  activationDescriptor);
59 
60  return arm_compute::NEConvolutionLayer::validate(&aclInputInfo,
61  &aclWeightsInfo,
62  optionalAclBiasesInfo,
63  &aclOutputInfo,
64  layerInfo,
65  arm_compute::WeightsInfo(),
66  aclDilationInfo,
67  activationInfo,
68  isFastMathEnabled);
69 }

Referenced by NeonLayerSupport::IsConvolution2dSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonConvolution3dWorkloadValidate()

arm_compute::Status NeonConvolution3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution3dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonConvolution3dWorkload.cpp.

31 {
32  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
33  const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
34  arm_compute::TensorInfo aclBiasesInfo;
35  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
36  if (descriptor.m_BiasEnabled)
37  {
38  if (!biases.has_value())
39  {
40  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
41  "ArmNN NeonConvolution3dWorkload has empty bias value."};
42  }
43 
44  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
45  optionalAclBiasesInfo = &aclBiasesInfo;
46  }
47  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
48 
49  const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
50  isFastMathEnabled,
51  activationDescriptor);
52 
53  return arm_compute::NEConv3D::validate(&aclInputInfo,
54  &aclWeightsInfo,
55  optionalAclBiasesInfo,
56  &aclOutputInfo,
57  aclConv3DInfo);
58 }

Referenced by NeonLayerSupport::IsConvolution3dSupported().

◆ NeonDepthToSpaceWorkloadValidate()

arm_compute::Status NeonDepthToSpaceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthToSpaceDescriptor descriptor 
)

Definition at line 19 of file NeonDepthToSpaceWorkload.cpp.

22 {
23  DataLayout dataLayout = descriptor.m_DataLayout;
24  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
25  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
26 
27  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
28 
29  return arm_compute::NEDepthToSpaceLayer::validate(&aclInput, &aclOutput, blockSize);
30 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by NeonLayerSupport::IsDepthToSpaceSupported().

◆ NeonDepthwiseConvolutionWorkloadValidate()

arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const ActivationDescriptor activationDescriptor 
)

Definition at line 29 of file NeonDepthwiseConvolutionWorkload.cpp.

35 {
36  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
37  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
38 
39  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
40  //
41  // ACL format for weights for depthwise is:
42  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
43  // - [1, C, H, W] for [N, C, H, W] input/output layout
44  //
45  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
46  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
47  // so we do the permute here for the TensorInfo weights.
48  unsigned int aclDepthMultiplier;
49  TensorInfo weightsPermuted;
50  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
51 
52  // Convert the weights into the compute library format
53  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
54  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
55 
56  arm_compute::TensorInfo aclBiasesInfo;
57  arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
58  if (descriptor.m_BiasEnabled)
59  {
60  if(!biases.has_value())
61  {
62  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
63  "ArmNN NeonDepthwiseConvolutionWorkload has empty bias value."};
64  }
65  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
66  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
67  optionalAclBiasesInfo = &aclBiasesInfo;
68  }
69 
70  const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
71  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
72  descriptor.m_DilationY);
73 
74  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
75  activationDescriptor);
76 
77  return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
78  &aclWeightsInfo,
79  optionalAclBiasesInfo,
80  &aclOutputInfo,
81  aclPadStrideInfo,
82  aclDepthMultiplier,
83  activationInfo,
84  aclDilationInfo);
85 }

Referenced by NeonLayerSupport::IsDepthwiseConvolutionSupported(), NeonLayerSupport::IsDilatedDepthwiseConvolutionSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonDequantizeWorkloadValidate()

arm_compute::Status NeonDequantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 22 of file NeonDequantizeWorkload.cpp.

24 {
25  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
27 
28  return arm_compute::NEDequantizationLayer::validate(&aclInput, &aclOutput);
29 }

Referenced by NeonLayerSupport::IsDequantizeSupported().

◆ NeonDetected()

bool NeonDetected ( )

Definition at line 39 of file Utils.cpp.

40 {
41 #if !defined(__APPLE__)
42 #if !defined(ARMNN_BUILD_BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
43  auto hwcaps= getauxval(AT_HWCAP);
44 #endif
45 
46 #if !defined(ARMNN_BUILD_BARE_METAL) && defined(__aarch64__)
47 
48  if (hwcaps & HWCAP_ASIMD)
49  {
50  // On an arm64 device with Neon.
51  return true;
52  }
53  else
54  {
55  // On an arm64 device without Neon.
56  return false;
57  }
58 
59 #endif
60 #if !defined(ARMNN_BUILD_BARE_METAL) && defined(__arm__)
61 
62  if (hwcaps & HWCAP_NEON)
63  {
64  // On an armhf device with Neon.
65  return true;
66  }
67  else
68  {
69  // On an armhf device without Neon.
70  return false;
71  }
72 
73 #endif
74 #endif
75 
76  // This method of Neon detection is only supported on Linux so in order to prevent a false negative
77  // we will return true in cases where detection did not run.
78  return true;
79 }

◆ NeonDetectionPostProcessValidate()

arm_compute::Status NeonDetectionPostProcessValidate ( const TensorInfo boxEncodings,
const TensorInfo scores,
const TensorInfo anchors,
const TensorInfo detectionBoxes,
const TensorInfo detectionClasses,
const TensorInfo detectionScores,
const TensorInfo numDetections,
const DetectionPostProcessDescriptor descriptor 
)

Definition at line 32 of file NeonDetectionPostProcessWorkload.cpp.

40 {
41  arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(descriptor);
42 
43  const arm_compute::TensorInfo aclBoxEncodings =
44  armcomputetensorutils::BuildArmComputeTensorInfo(boxEncodings);
45 
46  const arm_compute::TensorInfo aclScores =
47  armcomputetensorutils::BuildArmComputeTensorInfo(scores);
48 
49  const arm_compute::TensorInfo aclAnchors =
50  armcomputetensorutils::BuildArmComputeTensorInfo(anchors);
51 
52  arm_compute::TensorInfo aclDetectionBoxes =
53  armcomputetensorutils::BuildArmComputeTensorInfo(detectionBoxes);
54 
55  arm_compute::TensorInfo aclDetectionClasses =
56  armcomputetensorutils::BuildArmComputeTensorInfo(detectionClasses);
57 
58  arm_compute::TensorInfo aclDetectionScores =
59  armcomputetensorutils::BuildArmComputeTensorInfo(detectionScores);
60 
61  arm_compute::TensorInfo aclNumDetections =
62  armcomputetensorutils::BuildArmComputeTensorInfo(numDetections);
63 
64  return arm_compute::NEDetectionPostProcessLayer::validate(
65  &aclBoxEncodings,
66  &aclScores,
67  &aclAnchors,
68  &aclDetectionBoxes,
69  &aclDetectionClasses,
70  &aclDetectionScores,
71  &aclNumDetections,
72  info);
73 }

References info, and MakeInfo().

◆ NeonDivisionWorkloadValidate()

arm_compute::Status NeonDivisionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 18 of file NeonDivisionWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
24  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
28  activationDescriptor);
29 
30  return arm_compute::NEElementwiseDivision::validate(&aclInput0,
31  &aclInput1,
32  &aclOutput,
33  activationInfo);
34 }

Referenced by NeonLayerSupport::IsDivisionSupported(), IsLayerTypeSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonElementwiseBinaryWorkloadValidate()

arm_compute::Status NeonElementwiseBinaryWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ElementwiseBinaryDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 20 of file NeonElementwiseBinaryWorkload.cpp.

25 {
26  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
27  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
28  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
29 
30  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
31  activationDescriptor);
32 
33  switch (descriptor.m_Operation)
34  {
36  return arm_compute::NEElementwisePower::validate(&aclInput0,
37  &aclInput1,
38  &aclOutput,
39  activationInfo);
41  return arm_compute::NEElementwiseSquaredDiff::validate(&aclInput0,
42  &aclInput1,
43  &aclOutput,
44  activationInfo);
45  default:
46  throw InvalidArgumentException("Unknown binary operator", CHECK_LOCATION());
47  }
48 }

Referenced by IsLayerTypeSupported().

◆ NeonExpWorkloadValidate()

arm_compute::Status NeonExpWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonExpWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEExpLayer::validate(&aclInput, &aclOutput);
23 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonFullyConnectedWorkloadValidate()

arm_compute::Status NeonFullyConnectedWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const FullyConnectedDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonFullyConnectedWorkload.cpp.

30 {
31  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
32  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
33  arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
34  aclWeights.set_are_values_constant(weights.IsConstant());
35 
36  arm_compute::TensorInfo aclBiases;
37  arm_compute::TensorInfo* optionalAclBiases = nullptr;
38  if (descriptor.m_BiasEnabled)
39  {
41  biases.has_value(),
42  "NeonFullyConnectedWorkload: Bias was enabled in the descriptor but no value was supplied.");
43  aclBiases = BuildArmComputeTensorInfo(biases.value());
44  aclBiases.set_are_values_constant(biases.value().IsConstant());
45  optionalAclBiases = &aclBiases;
46  }
47 
48  const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
49  ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
50  return arm_compute::NEFullyConnectedLayer::validate(&aclInput,
51  &aclWeights,
52  optionalAclBiases,
53  &aclOutput,
54  fullyConnectedLayerInfo);
55 }

Referenced by NeonLayerSupport::IsFullyConnectedSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonFusedWorkloadValidate()

arm_compute::Status NeonFusedWorkloadValidate ( const std::vector< std::reference_wrapper< TensorInfo >> &  inputInfos,
const std::vector< std::reference_wrapper< TensorInfo >> &  outputInfos,
const FusedDescriptor fusedDescriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 22 of file NeonFusedWorkload.cpp.

26 {
27  std::vector<arm_compute::TensorInfo> actInputInfos;
28  actInputInfos.reserve(inputInfos.size());
29  for (size_t i = 0u; i < inputInfos.size(); ++i)
30  {
31  actInputInfos.emplace_back(BuildArmComputeTensorInfo(inputInfos[i]));
32  }
33 
34  std::vector<arm_compute::TensorInfo> actOutputInfos;
35  actOutputInfos.reserve(outputInfos.size());
36  for (size_t i = 0u; i < outputInfos.size(); ++i)
37  {
38  actOutputInfos.emplace_back(BuildArmComputeTensorInfo(outputInfos[i]));
39  }
40 
41  const arm_compute::ActivationLayerInfo activationInfo =
43 
44  switch (fusedDescriptor.m_FusedKernelType)
45  {
46  case FusedKernelType::AddMulAdd:
47  return arm_compute::NEAddMulAdd::validate(
48  &actInputInfos[0],
49  &actInputInfos[1],
50  &actInputInfos[2], // bn_mul
51  &actInputInfos[3], // bn_add
52  actOutputInfos.size() == 1 ? nullptr : &actOutputInfos[0], // add_output
53  actOutputInfos.size() == 1 ? &actOutputInfos[0] : &actOutputInfos[1], // final_output
54  arm_compute::ConvertPolicy::SATURATE,
55  activationInfo);
56  default:
57  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
58  "NeonFusedWorkloadValidate: no valid kernel type"};
59  }
60 }

Referenced by NeonLayerSupport::IsFusedSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonGatherNdWorkloadValidate()

arm_compute::Status NeonGatherNdWorkloadValidate ( const TensorInfo paramsInfo,
const TensorInfo indicesInfo,
const TensorInfo outputInfo 
)

Validate Mul

Validate ReduceSum

Validate Gather

Validate Reshape

Return OK if all the layers are valid

Definition at line 14 of file NeonGatherNdWorkload.cpp.

17 {
18  // Calculate ND, K, W, C.
19  std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
20 
21  /// Validate Mul
22  // Indices with shape { W, ND }
23  armnn::TensorInfo indices_W_ND_Info = indicesInfo;
24  indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
25  const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
26 
27  // Flattened coefficients with shape { ND }
28  armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
29  flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
30  const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
31 
32  // Output of Mul with shape { W, ND }
33  const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
34 
35  auto statusMul = arm_compute::NEPixelWiseMultiplication::validate(&aclIndicesInfo,
36  &aclFlattenedCoeffInfo,
37  &aclOutputMulInfo,
38  1.0f,
39  arm_compute::ConvertPolicy::WRAP,
40  arm_compute::RoundingPolicy::TO_ZERO,
41  arm_compute::ActivationLayerInfo());
42 
43  /// Validate ReduceSum
44  // Flattened indices with shape { W }
45  armnn::TensorInfo flattenedIndices_Info = indicesInfo;
46  flattenedIndices_Info.SetShape({ keyIndices["W"] });
47  const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
48 
49  const std::vector<unsigned int> armnnReduceAxes(1, 1);
50  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
51  indices_W_ND_Info.GetNumDimensions(),
52  armnnReduceAxes);
53 
54  auto statusReduceSum = arm_compute::NEReductionOperation::validate(&aclOutputMulInfo,
55  &aclFlattenedIndicesInfo,
56  static_cast<unsigned int>(coords[0]),
57  arm_compute::ReductionOperation::SUM,
58  false);
59 
60  /// Validate Gather
61  // Params with shape { K, C }
62  armnn::TensorInfo params_K_C_Info = paramsInfo;
63  params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
64  const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
65 
66  // Output of gather with shape { W, C }
67  armnn::TensorInfo outputGather_Info = outputInfo;
68  outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
69  const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
70 
71  auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
72  auto statusGather =
73  arm_compute::NEGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
74 
75  /// Validate Reshape
76  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
77 
78  auto statusReshape = arm_compute::NEReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
79 
80  /// Return OK if all the layers are valid
81  auto okCode = arm_compute::ErrorCode::OK;
82  if (statusMul.error_code() == okCode &&
83  statusReduceSum.error_code() == okCode &&
84  statusGather.error_code() == okCode &&
85  statusReshape.error_code() == okCode)
86  {
87  return arm_compute::Status(arm_compute::ErrorCode::OK,
88  "All GatherND layers validate status OK.");
89  }
90  else
91  {
92  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
93  "GatherND layer validate status failed.");
94  }
95 }

References CalculateGatherNdKeyIndices(), and TensorInfo::SetShape().

Referenced by NeonLayerSupport::IsGatherNdSupported().

◆ NeonGatherWorkloadValidate()

arm_compute::Status NeonGatherWorkloadValidate ( const TensorInfo input,
const TensorInfo indices,
const TensorInfo output,
const GatherDescriptor descriptor 
)

Definition at line 13 of file NeonGatherWorkload.cpp.

17 {
18  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
19  const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
20  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
21 
22  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
23 
24  return arm_compute::NEGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
25 }

Referenced by NeonLayerSupport::IsGatherSupported().

◆ NeonInstanceNormalizationWorkloadValidate()

arm_compute::Status NeonInstanceNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const InstanceNormalizationDescriptor descriptor 
)

Definition at line 19 of file NeonInstanceNormalizationWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
25 
26  return arm_compute::NEInstanceNormalizationLayer::validate(&aclInputInfo,
27  &aclOutputInfo,
28  descriptor.m_Gamma,
29  descriptor.m_Beta,
30  descriptor.m_Eps);
31 }

Referenced by NeonLayerSupport::IsInstanceNormalizationSupported().

◆ NeonL2NormalizationWorkloadValidate()

arm_compute::Status NeonL2NormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const L2NormalizationDescriptor descriptor 
)

Definition at line 19 of file NeonL2NormalizationFloatWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
24  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
25 
26  int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
27 
28  return arm_compute::NEL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps);
29 }

Referenced by NeonLayerSupport::IsL2NormalizationSupported().

◆ NeonLogicalAndWorkloadValidate()

arm_compute::Status NeonLogicalAndWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 18 of file NeonLogicalAndWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
23  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
25 
26  const arm_compute::Status aclStatus = arm_compute::NELogicalAnd::validate(&aclInputInfo0,
27  &aclInputInfo1,
28  &aclOutputInfo);
29  return aclStatus;
30 }

Referenced by NeonLayerSupport::IsLogicalBinarySupported().

◆ NeonLogicalNotWorkloadValidate()

arm_compute::Status NeonLogicalNotWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file NeonLogicalNotWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
24 
25  const arm_compute::Status aclStatus = arm_compute::NELogicalNot::validate(&aclInputInfo,
26  &aclOutputInfo);
27  return aclStatus;
28 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonLogicalOrWorkloadValidate()

arm_compute::Status NeonLogicalOrWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 18 of file NeonLogicalOrWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
23  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
25 
26  const arm_compute::Status aclStatus = arm_compute::NELogicalOr::validate(&aclInputInfo0,
27  &aclInputInfo1,
28  &aclOutputInfo);
29  return aclStatus;
30 }

Referenced by NeonLayerSupport::IsLogicalBinarySupported().

◆ NeonLogSoftmaxWorkloadValidate()

arm_compute::Status NeonLogSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const LogSoftmaxDescriptor descriptor 
)

Definition at line 19 of file NeonLogSoftmaxWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
24  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
25 
26  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
27  return arm_compute::NELogSoftmaxLayer::validate(&aclInputInfo,
28  &aclOutputInfo,
29  descriptor.m_Beta,
30  aclAxis);
31 }

Referenced by NeonLayerSupport::IsLogSoftmaxSupported().

◆ NeonLogWorkloadValidate()

arm_compute::Status NeonLogWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonLogWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NELogLayer::validate(&aclInput, &aclOutput);
23 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonLstmFloatWorkloadValidate()

arm_compute::Status NeonLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo scratchBuffer,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const LstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 253 of file NeonLstmFloatWorkload.cpp.

262 {
263  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
264 
265  // The inputs and outputs
266  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
267  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
268  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
269  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
270  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
271  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
272  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
273 
274  // Basic parameters
275  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
276  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
277  const arm_compute::TensorInfo aclInputToCellWeightsInfo
278  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
279  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
280  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
281  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
282  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
283  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
284  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
285  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
286  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
287  const arm_compute::TensorInfo aclForgetGateBiasInfo
288  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
289  const arm_compute::TensorInfo aclCellBiasInfo
290  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
291  const arm_compute::TensorInfo aclOutputGateBiasInfo
292  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
293 
294  arm_compute::TensorInfo aclInputToInputWeightsInfo;
295  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
296  arm_compute::TensorInfo aclCellToInputWeightsInfo;
297  arm_compute::TensorInfo aclInputGateBiasInfo;
298  arm_compute::TensorInfo aclProjectionWeightsInfo;
299  arm_compute::TensorInfo aclProjectionBiasInfo;
300  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
301  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
302 
303  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
304  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
305  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
306  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
307 
308 
309  if (!descriptor.m_CifgEnabled)
310  {
311  if (descriptor.m_PeepholeEnabled)
312  {
313  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
314  }
315  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
316  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
317  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
318 
319  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
320  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
321  &aclInputGateBiasInfo);
322  }
323 
324  if (descriptor.m_ProjectionEnabled)
325  {
326  if (paramsInfo.m_ProjectionBias != nullptr)
327  {
328  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
329  }
330  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
331 
332  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
333  paramsInfo.m_ProjectionBias != nullptr ?
334  &aclProjectionBiasInfo : nullptr);
335  }
336 
337  if (descriptor.m_PeepholeEnabled)
338  {
339  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
340  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
341 
342  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
343  }
344 
345  if (descriptor.m_LayerNormEnabled)
346  {
347  if (!descriptor.m_CifgEnabled)
348  {
349  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
350  }
351  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
352  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
353  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
354 
355  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ?
356  nullptr : &aclInputLayerNormWeightsInfo,
357  &aclForgetLayerNormWeightsInfo,
358  &aclCellLayerNormWeightsInfo,
359  &aclOutputLayerNormWeightsInfo);
360  }
361 
362  float cell_threshold = descriptor.m_ClippingThresCell;
363  float projection_threshold = descriptor.m_ClippingThresProj;
364 
365  // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
366  arm_compute::ActivationLayerInfo activationLayerInfo =
367  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
368 
369  return arm_compute::NELSTMLayer::validate(&aclInputInfo,
370  &aclInputToForgetWeightsInfo,
371  &aclInputToCellWeightsInfo,
372  &aclInputToOutputWeightsInfo,
373  &aclRecurrentToForgetWeightsInfo,
374  &aclRecurrentToCellWeightsInfo,
375  &aclRecurrentToOutputWeightsInfo,
376  &aclForgetGateBiasInfo,
377  &aclCellBiasInfo,
378  &aclOutputGateBiasInfo,
379  &aclOutputStateInInfo,
380  &aclCellStateInInfo,
381  &aclScratchBufferInfo,
382  &aclOutputStateOutInfo,
383  &aclCellStateOutInfo,
384  &aclOutputInfo,
385  lstm_params_info,
386  activationLayerInfo,
387  cell_threshold,
388  projection_threshold);
389 }

Referenced by NeonLayerSupport::IsLstmSupported().

◆ NeonMaximumWorkloadValidate()

arm_compute::Status NeonMaximumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 14 of file NeonMaximumWorkload.cpp.

17 {
18  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
19  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEElementwiseMax::validate(&aclInput0,
23  &aclInput1,
24  &aclOutput);
25 }

Referenced by IsLayerTypeSupported(), and NeonLayerSupport::IsMaximumSupported().

◆ NeonMeanWorkloadValidate()

arm_compute::Status NeonMeanWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const MeanDescriptor descriptor 
)

Definition at line 18 of file NeonMeanWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
25  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
26  input.GetNumDimensions(),
27  descriptor.m_Axis);
28 
29  return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
30 }

Referenced by NeonLayerSupport::IsMeanSupported().

◆ NeonMinimumWorkloadValidate()

arm_compute::Status NeonMinimumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Validate function for validating the inputs and output.

Parameters
[in]input0The input0 value to be validated.
[in]input1The input1 value to be validated.
[in]outputThe output value to be validated.

Definition at line 15 of file NeonMinimumWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
20  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NEElementwiseMin::validate(&aclInput0,
24  &aclInput1,
25  &aclOutput);
26 }

Referenced by IsLayerTypeSupported(), and NeonLayerSupport::IsMinimumSupported().

◆ NeonMultiplicationWorkloadValidate()

arm_compute::Status NeonMultiplicationWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file NeonMultiplicationWorkload.cpp.

23 {
24  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
25  const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
26  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
27 
28  auto convertPolicy = (IsQuantizedType(input0.GetDataType()) || IsQuantizedType(input1.GetDataType())) ?
29  arm_compute::ConvertPolicy::SATURATE :
30  arm_compute::ConvertPolicy::WRAP;
31 
32  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
33  activationDescriptor);
34 
35  // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
36  // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
37  // ignored for F32 tensors.
38  return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1,
39  &aclInput2,
40  &aclOutput,
41  1.0f,
42  convertPolicy,
43  arm_compute::RoundingPolicy::TO_ZERO,
44  activationInfo);
45 }

Referenced by IsLayerTypeSupported(), NeonLayerSupport::IsMultiplicationSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonNegWorkloadValidate()

arm_compute::Status NeonNegWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonNegWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NENegLayer::validate(&aclInput, &aclOutput);
23 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonNormalizationWorkloadValidate()

arm_compute::Status NeonNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const NormalizationDescriptor descriptor 
)

Definition at line 49 of file NeonNormalizationFloatWorkload.cpp.

52 {
53  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
54  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
55 
56  arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
57 
58  return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo);
59 }

Referenced by NeonLayerSupport::IsNormalizationSupported().

◆ NeonPadWorkloadValidate()

arm_compute::Status NeonPadWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const PadDescriptor descriptor 
)

Definition at line 59 of file NeonPadWorkload.cpp.

62 {
63  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
64  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
65 
66  std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size());
67 
68  std::reverse_copy(std::begin(descriptor.m_PadList),
69  std::end(descriptor.m_PadList),
70  std::begin(reversed_PadList));
71 
72  arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
73 
74  // PixelValue is currently unused when validating, but it's required to pass in PaddingMode.
75  arm_compute::PixelValue pixelValue = GetPixelValue(&aclInputInfo, descriptor.m_PadValue);
76  return arm_compute::NEPadLayer::validate(&aclInputInfo,
77  &aclOutputInfo,
78  padList,
79  pixelValue,
80  ConvertPaddingModeToAcl(descriptor.m_PaddingMode));
81 }

Referenced by NeonLayerSupport::IsPadSupported().

◆ NeonPermuteWorkloadValidate()

arm_compute::Status NeonPermuteWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const PermuteDescriptor descriptor 
)

Definition at line 15 of file NeonPermuteWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
22 
23  return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
24  armcomputetensorutils::BuildArmComputePermutationVector(mappings));
25 }

Referenced by NeonLayerSupport::IsPermuteSupported().

◆ NeonPooling2dWorkloadValidate()

arm_compute::Status NeonPooling2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling2dDescriptor descriptor 
)

Definition at line 22 of file NeonPooling2dWorkload.cpp.

25 {
26  const arm_compute::TensorInfo aclInputInfo =
27  BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
28  const arm_compute::TensorInfo aclOutputInfo =
29  BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
30 
31  arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
32 
33  return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
34 }

Referenced by NeonLayerSupport::IsPooling2dSupported().

◆ NeonPooling3dWorkloadValidate()

arm_compute::Status NeonPooling3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling3dDescriptor descriptor 
)

Definition at line 15 of file NeonPooling3dWorkload.cpp.

18  {
19  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
20  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
21  arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
22  return arm_compute::NEPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
23  }

Referenced by NeonLayerSupport::IsPooling3dSupported().

◆ NeonPreluWorkloadValidate()

arm_compute::Status NeonPreluWorkloadValidate ( const TensorInfo input,
const TensorInfo alpha,
const TensorInfo output 
)

Definition at line 17 of file NeonPreluWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
23  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
25  return arm_compute::NEPReluLayer::validate(&aclInput,
26  &aclAlpha,
27  &aclOutput);
28 }

Referenced by NeonLayerSupport::IsPreluSupported().

◆ NeonQLstmWorkloadValidate()

arm_compute::Status NeonQLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo cellStateIn,
const TensorInfo outputStateIn,
const TensorInfo cellStateOut,
const TensorInfo outputStateOut,
const TensorInfo output,
const QLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 243 of file NeonQLstmWorkload.cpp.

251 {
252  arm_compute::LSTMParams<arm_compute::ITensorInfo> aclParamsInfo;
253 
254  // Input/Output tensor info
255  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
256  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
257  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
258 
259  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
260  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
261  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
262 
263  // Mandatory tensor info
264  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
265  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
266  const arm_compute::TensorInfo aclInputToCellWeightsInfo
267  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
268  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
269  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
270  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
271  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
272  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
273  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
274  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
275  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
276  const arm_compute::TensorInfo aclForgetGateBiasInfo
277  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
278  const arm_compute::TensorInfo aclCellBiasInfo
279  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
280  const arm_compute::TensorInfo aclOutputGateBiasInfo
281  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
282 
283  // Optional tensor info
284  arm_compute::TensorInfo aclInputToInputWeightsInfo;
285  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
286 
287  arm_compute::TensorInfo aclCellToInputWeightsInfo;
288  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
289  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
290 
291  arm_compute::TensorInfo aclInputGateBiasInfo;
292 
293  arm_compute::TensorInfo aclProjectionWeightsInfo;
294  arm_compute::TensorInfo aclProjectionBiasInfo;
295 
296  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
297  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
298  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
299  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
300 
301  // Create tensor info for optional params if they are enabled
302  if (descriptor.m_PeepholeEnabled)
303  {
304  if (!descriptor.m_CifgEnabled)
305  {
306  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
307  }
308 
309  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
310  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
311 
312  // Set peephole params info
313  aclParamsInfo.set_peephole_params(&aclCellToForgetWeightsInfo,
314  &aclCellToOutputWeightsInfo);
315  }
316 
317  if (descriptor.m_ProjectionEnabled)
318  {
319  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
320 
321  if (paramsInfo.m_ProjectionBias != nullptr)
322  {
323  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
324  }
325 
326  // Set projection params info
327  aclParamsInfo.set_projection_params(
328  &aclProjectionWeightsInfo,
329  paramsInfo.m_ProjectionBias != nullptr ? &aclProjectionBiasInfo : nullptr);
330  }
331 
332  if (descriptor.m_LayerNormEnabled)
333  {
334  if (!descriptor.m_CifgEnabled)
335  {
336  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
337  }
338 
339  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
340  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
341  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
342 
343  // Set layer norm params info
344  aclParamsInfo.set_layer_normalization_params(
345  paramsInfo.m_InputLayerNormWeights != nullptr ? &aclInputLayerNormWeightsInfo : nullptr,
346  &aclForgetLayerNormWeightsInfo,
347  &aclCellLayerNormWeightsInfo,
348  &aclOutputLayerNormWeightsInfo);
349  }
350 
351  if (!descriptor.m_CifgEnabled)
352  {
353  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
354  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
355  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
356 
357  // Set CIFG params info
358  aclParamsInfo.set_cifg_params(
359  &aclInputToInputWeightsInfo,
360  &aclRecurrentToInputWeightsInfo,
361  paramsInfo.m_CellToInputWeights != nullptr ? &aclCellToInputWeightsInfo : nullptr,
362  &aclInputGateBiasInfo);
363  }
364 
365  // Set scalar descriptor params
366  aclParamsInfo.set_cell_clip_params(descriptor.m_CellClip);
367  aclParamsInfo.set_projection_clip_params(descriptor.m_ProjectionClip);
368  aclParamsInfo.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
369  aclParamsInfo.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
370  descriptor.m_ForgetIntermediateScale,
371  descriptor.m_CellIntermediateScale,
372  descriptor.m_OutputIntermediateScale);
373 
374  // QLSTM NEON validate
375  return arm_compute::NEQLSTMLayer::validate(&aclInputInfo,
376  &aclInputToForgetWeightsInfo,
377  &aclInputToCellWeightsInfo,
378  &aclInputToOutputWeightsInfo,
379  &aclRecurrentToForgetWeightsInfo,
380  &aclRecurrentToCellWeightsInfo,
381  &aclRecurrentToOutputWeightsInfo,
382  &aclForgetGateBiasInfo,
383  &aclCellBiasInfo,
384  &aclOutputGateBiasInfo,
385  &aclCellStateInInfo,
386  &aclOutputStateInInfo,
387  &aclCellStateOutInfo,
388  &aclOutputStateOutInfo,
389  &aclOutputInfo,
390  aclParamsInfo);
391 }

Referenced by NeonLayerSupport::IsQLstmSupported().

◆ NeonQuantizedLstmWorkloadValidate()

arm_compute::Status NeonQuantizedLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo cellStateIn,
const TensorInfo outputStateIn,
const TensorInfo cellStateOut,
const TensorInfo outputStateOut,
const QuantizedLstmInputParamsInfo paramsInfo 
)

Definition at line 131 of file NeonQuantizedLstmWorkload.cpp.

137 {
138  // The inputs and outputs
139  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
140  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
141  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
142  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
143  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
144 
145  // Basic parameters
146  const arm_compute::TensorInfo aclInputToInputWeightsInfo
147  = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
148  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
149  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
150  const arm_compute::TensorInfo aclInputToCellWeightsInfo
151  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
152  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
153  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
154 
155  const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo
156  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
157  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
158  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
159  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
160  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
161  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
162  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
163 
164  const arm_compute::TensorInfo aclInputGateBiasInfo
165  = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
166  const arm_compute::TensorInfo aclForgetGateBiasInfo
167  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
168  const arm_compute::TensorInfo aclCellBiasInfo
169  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
170  const arm_compute::TensorInfo aclOutputGateBiasInfo
171  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
172 
173  return arm_compute::NELSTMLayerQuantized::validate(&aclInputInfo,
174  &aclInputToInputWeightsInfo,
175  &aclInputToForgetWeightsInfo,
176  &aclInputToCellWeightsInfo,
177  &aclInputToOutputWeightsInfo,
178  &aclRecurrentToInputWeightsInfo,
179  &aclRecurrentToForgetWeightsInfo,
180  &aclRecurrentToCellWeightsInfo,
181  &aclRecurrentToOutputWeightsInfo,
182  &aclInputGateBiasInfo,
183  &aclForgetGateBiasInfo,
184  &aclCellBiasInfo,
185  &aclOutputGateBiasInfo,
186  &aclCellStateInInfo,
187  &aclOutputStateInInfo,
188  &aclCellStateOutInfo,
189  &aclOutputStateOutInfo);
190 }

Referenced by NeonLayerSupport::IsQuantizedLstmSupported().

◆ NeonQuantizeWorkloadValidate()

arm_compute::Status NeonQuantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file NeonQuantizeWorkload.cpp.

19 {
20  const arm_compute::TensorInfo neonInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NEQuantizationLayer::validate(&neonInputInfo, &neonOutputInfo);
24 }

Referenced by NeonLayerSupport::IsQuantizeSupported().

◆ NeonReduceWorkloadValidate()

arm_compute::Status NeonReduceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ReduceDescriptor descriptor 
)

Definition at line 19 of file NeonReduceWorkload.cpp.

22 {
23  if ( descriptor.m_vAxis.size()==1 || descriptor.m_vAxis.empty())
24  {
25  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
27 
28  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
29  input.GetNumDimensions(),
30  descriptor.m_vAxis);
31 
32  return arm_compute::NEReductionOperation::validate(&aclInputInfo,
33  &aclOutputInfo,
34  static_cast<unsigned int>(coords[0]),
36  descriptor.m_KeepDims);
37  }
38  else
39  {
40  // Validate layer if there are multiple axes.
41  arm_compute::Status status;
43  return status;
44  }
45 }

References ReduceDescriptor::m_vAxis.

Referenced by NeonLayerSupport::IsReduceSupported().

◆ NeonReshapeWorkloadValidate()

arm_compute::Status NeonReshapeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonReshapeWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
24 }

Referenced by NeonLayerSupport::IsReshapeSupported().

◆ NeonResizeWorkloadValidate()

arm_compute::Status NeonResizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ResizeDescriptor descriptor 
)

Definition at line 22 of file NeonResizeWorkload.cpp.

25 {
26  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
27  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
28 
29  arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout);
30  aclInputInfo.set_data_layout(aclDataLayout);
31  aclOutputInfo.set_data_layout(aclDataLayout);
32 
33  arm_compute::InterpolationPolicy aclInterpolationPolicy =
35 
36  arm_compute::SamplingPolicy samplingPolicy = descriptor.m_HalfPixelCenters ? arm_compute::SamplingPolicy::CENTER :
37  arm_compute::SamplingPolicy::TOP_LEFT;
38 
39  bool usePadding = false;
40 
41  return arm_compute::NEScale::validate(&aclInputInfo,
42  &aclOutputInfo,
43  arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
44  arm_compute::BorderMode::REPLICATE,
45  arm_compute::PixelValue(0.f),
46  samplingPolicy,
47  usePadding,
48  descriptor.m_AlignCorners));
49 
50 }

Referenced by NeonLayerSupport::IsResizeSupported().

◆ NeonReverseV2WorkloadValidate()

arm_compute::Status NeonReverseV2WorkloadValidate ( const TensorInfo input,
const TensorInfo axis,
const TensorInfo output 
)

Definition at line 14 of file NeonReverseV2Workload.cpp.

17 {
18  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
19  const arm_compute::TensorInfo aclAxis = BuildArmComputeTensorInfo(axis);
20  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEReverse::validate(&aclInput, &aclOutput, &aclAxis, true);
23 }

Referenced by NeonLayerSupport::IsReverseV2Supported().

◆ NeonRsqrtWorkloadValidate()

arm_compute::Status NeonRsqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file NeonRsqrtWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NERsqrtLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonSinWorkloadValidate()

arm_compute::Status NeonSinWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonSinWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NESinLayer::validate(&aclInput, &aclOutput);
23 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonSliceWorkloadValidate()

arm_compute::Status NeonSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SliceDescriptor descriptor 
)

Definition at line 21 of file NeonSliceWorkload.cpp.

24 {
25  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
27 
30 
31  std::tie(starts, ends) = SetNeonSliceData(descriptor.m_Begin, descriptor.m_Size);
32 
33  return arm_compute::NESlice::validate(&aclInputInfo, &aclOutputInfo, starts, ends);
34 }

Referenced by NeonLayerSupport::IsSliceSupported().

◆ NeonSoftmaxWorkloadValidate()

arm_compute::Status NeonSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor 
)

Definition at line 19 of file NeonSoftmaxWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
24  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
25 
26  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
27  return arm_compute::NESoftmaxLayer::validate(&aclInputInfo,
28  &aclOutputInfo,
29  descriptor.m_Beta,
30  aclAxis);
31 }

Referenced by NeonLayerSupport::IsSoftmaxSupported().

◆ NeonSpaceToBatchNdWorkloadValidate()

arm_compute::Status NeonSpaceToBatchNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToBatchNdDescriptor descriptor 
)

Definition at line 15 of file NeonSpaceToBatchNdWorkload.cpp.

18 {
19  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
20  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
21 
22  arm_compute::Status statusSpaceToBatch = arm_compute::Status(arm_compute::ErrorCode::OK);
23  arm_compute::Status statusReshapeInput = arm_compute::Status(arm_compute::ErrorCode::OK);
24  arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
25 
26  arm_compute::TensorInfo aclReshapeInputInfo = aclInputInfo;
27  arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
28 
29  // When a spacial dimension is missing (rank=3) set W to 1
30  const unsigned int rank = input.GetNumDimensions();
31  if (rank == 3)
32  {
33  const arm_compute::TensorShape inputShape = aclInputInfo.tensor_shape();
34  const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
35 
36  if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
37  {
38  // In ACL dimensions are right to left: C, W, H, N
39  aclReshapeInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
40  aclReshapeOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
41  }
42  else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
43  {
44  // In ACL dimensions are right to left: W, H, C, N
45  aclReshapeInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
46  aclReshapeOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
47  }
48  else
49  {
50  throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
51  }
52 
53  statusReshapeInput = arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
54  statusReshapeOutput = arm_compute::NEReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
55  }
56 
57  // ArmNN blockShape is [H, W] ACl asks for W, H
58  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
59  int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
60 
61  unsigned int padLeft = (rank == 3) ? 0 : descriptor.m_PadList[1].first;
62  unsigned int padRight = (rank == 3) ? 0 : descriptor.m_PadList[1].second;
63  arm_compute::Size2D paddingLeftTop = BuildArmComputeSize2D(padLeft,
64  descriptor.m_PadList[0].first);
65  arm_compute::Size2D paddingRightBottom = BuildArmComputeSize2D(padRight,
66  descriptor.m_PadList[0].second);
67 
68  statusSpaceToBatch = arm_compute::NESpaceToBatchLayer::validate(rank == 3 ? &aclReshapeInputInfo : &aclInputInfo,
69  blockWidth,
70  blockHeight,
71  paddingLeftTop,
72  paddingRightBottom,
73  rank == 3 ? &aclReshapeOutputInfo : &aclOutputInfo);
74 
75  if (statusReshapeInput.error_code() == arm_compute::ErrorCode::OK &&
76  statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
77  statusSpaceToBatch.error_code() == arm_compute::ErrorCode::OK)
78  {
79  return arm_compute::Status(arm_compute::ErrorCode::OK,
80  "All SpaceToBatch layers validate status OK.");
81  }
82  else
83  {
84  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
85  "SpaceToBatch layer validate status failed."
86  + statusSpaceToBatch.error_description()
87  + statusReshapeInput.error_description()
88  + statusReshapeOutput.error_description());
89  }
90 }

Referenced by NeonLayerSupport::IsSpaceToBatchNdSupported().

◆ NeonSpaceToDepthWorkloadValidate()

arm_compute::Status NeonSpaceToDepthWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToDepthDescriptor descriptor 
)

Definition at line 19 of file NeonSpaceToDepthWorkload.cpp.

22 {
23  DataLayout dataLayout = descriptor.m_DataLayout;
24  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
25  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
26 
27  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
28 
29  return arm_compute::NESpaceToDepthLayer::validate(&aclInput, &aclOutput, blockSize);
30 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by NeonLayerSupport::IsSpaceToDepthSupported().

◆ NeonSplitterWorkloadValidate()

arm_compute::Status NeonSplitterWorkloadValidate ( const TensorInfo input,
const std::vector< std::reference_wrapper< TensorInfo >> &  outputs,
unsigned int  splitAxis 
)

Definition at line 33 of file NeonSplitterWorkload.cpp.

36 {
37  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
38 
39  size_t numOutputs = outputs.size();
40 
41  std::vector<arm_compute::TensorInfo> aclOutputs;
42  aclOutputs.reserve(numOutputs);
43 
44  std::vector<arm_compute::ITensorInfo*> aclOutputPtr;
45  aclOutputPtr.reserve(numOutputs);
46 
47  for (size_t i = 0u; i < outputs.size(); ++i)
48  {
49  aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i]));
50  aclOutputPtr.emplace_back(&aclOutputs.back());
51  }
52 
53  unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis);
54  return arm_compute::NESplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
55 }

Referenced by NeonLayerSupport::IsSplitterSupported().

◆ NeonSqrtWorkloadValidate()

arm_compute::Status NeonSqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file NeonSqrtWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  ActivationDescriptor descriptor;
24  descriptor.m_Function = ActivationFunction::Sqrt;
25  const arm_compute::ActivationLayerInfo activationLayerInfo =
27 
28  return arm_compute::NEActivationLayer::validate(&aclInput, &aclOutput, activationLayerInfo);
29 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonStackWorkloadValidate()

arm_compute::Status NeonStackWorkloadValidate ( const std::vector< const TensorInfo * > &  inputs,
const TensorInfo output,
const StackDescriptor descriptor 
)

Definition at line 27 of file NeonStackWorkload.cpp.

30 {
31  std::vector<arm_compute::TensorInfo> aclInputs;
32  for (const TensorInfo* input : inputs)
33  {
34  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
35  aclInputs.emplace_back(aclInputInfo);
36  }
37 
38  std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
39  for (arm_compute::ITensorInfo& input : aclInputs)
40  {
41  aclInputPtrs.emplace_back(&input);
42  }
43 
44  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
45  int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
46  return arm_compute::NEStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
47 }

Referenced by NeonLayerSupport::IsStackSupported().

◆ NeonStridedSliceWorkloadValidate()

arm_compute::Status NeonStridedSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const StridedSliceDescriptor descriptor 
)

Definition at line 19 of file NeonStridedSliceWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input,
24  descriptor.m_DataLayout);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output,
26  descriptor.m_DataLayout);
27 
31 
32  std::tie(starts, ends, strides) = SetNeonStridedSliceData(descriptor.m_Begin,
33  descriptor.m_End,
34  descriptor.m_Stride);
35 
36  auto numDimensions = armnn::numeric_cast<int>(input.GetNumDimensions());
37  int32_t begin_mask = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions);
38  int32_t end_mask = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions);
39  int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions);
40 
41  return arm_compute::NEStridedSlice::validate(&aclInput,
42  &aclOutput,
43  starts,
44  ends,
45  strides,
46  begin_mask,
47  end_mask,
48  shrink_axis_mask);
49 }

Referenced by NeonLayerSupport::IsStridedSliceSupported().

◆ NeonSubtractionWorkloadValidate()

arm_compute::Status NeonSubtractionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 22 of file NeonSubtractionWorkload.cpp.

26 {
27  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
28  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
29  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
30 
31  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
32  activationDescriptor);
33 
34  return arm_compute::NEArithmeticSubtraction::validate(&aclInput0,
35  &aclInput1,
36  &aclOutput,
37  arm_compute::ConvertPolicy::SATURATE,
38  activationInfo);
39 }

Referenced by IsLayerTypeSupported(), NeonLayerSupport::IsSubtractionSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonTensorHandleFactoryId()

constexpr const char* armnn::NeonTensorHandleFactoryId ( )
constexpr

Definition at line 14 of file NeonTensorHandleFactory.hpp.

14 { return "Arm/Neon/TensorHandleFactory"; }

Referenced by NeonTensorHandleFactory::GetIdStatic().

◆ NeonTileWorkloadValidate()

arm_compute::Status NeonTileWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TileDescriptor descriptor 
)

Definition at line 14 of file NeonTileWorkload.cpp.

17 {
18  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
19  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
20 
21  std::vector<unsigned int> aclMultiples = descriptor.m_Multiples;
22  std::reverse(aclMultiples.begin(),aclMultiples.end());
23 
24  return arm_compute::NETile::validate(&aclInput, &aclOutput, aclMultiples);
25 }

Referenced by NeonLayerSupport::IsTileSupported().

◆ NeonTransposeConvolution2dWorkloadValidate()

arm_compute::Status NeonTransposeConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 25 of file NeonTransposeConvolution2dWorkload.cpp.

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
33  const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
34 
35  arm_compute::TensorInfo aclBiasesInfo;
36  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
37 
38  if (descriptor.m_BiasEnabled)
39  {
41  biases.has_value(),
42  "NeonTransposeConvolution2dWorkload: Bias was enabled in the descriptor but no value was supplied.");
43  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
44  optionalAclBiasesInfo = &aclBiasesInfo;
45  }
46 
47  arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
48 
49  return arm_compute::NEDeconvolutionLayer::validate(&aclInputInfo,
50  &aclWeightsInfo,
51  optionalAclBiasesInfo,
52  &aclOutputInfo,
53  layerInfo);
54 }

Referenced by NeonLayerSupport::IsTransposeConvolution2dSupported().

◆ NeonTransposeWorkloadValidate()

arm_compute::Status NeonTransposeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeDescriptor descriptor 
)

Definition at line 15 of file NeonTransposeWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
22 
23  return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
24  armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
25 }

Referenced by NeonLayerSupport::IsTransposeSupported().

◆ NeonUnidirectionalSequenceLstmFloatWorkloadValidate()

arm_compute::Status NeonUnidirectionalSequenceLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const UnidirectionalSequenceLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 510 of file NeonUnidirectionalSequenceLstmFloatWorkload.cpp.

518 {
519  TensorShape inputLayerShape = input.GetShape();
520  TensorShape outputLayerShape = output.GetShape();
521 
522  if (inputLayerShape.GetNumDimensions() != 3)
523  {
524  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
525  "Unidirectional Sequence LSTM layer validate status failed.");
526  }
527 
528  unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
529  unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
530  unsigned int inputSize = inputLayerShape[2];
531  unsigned int outputSize = outputLayerShape[2];
532 
533  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
534  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
535 
536  arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
537  "Permute1 status");
538  arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
539  "Split status");
540  arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
541  "LSTM status");
542  arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
543  "Concat status");
544  arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
545  "Permute2 status");
546 
547  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
548  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
549 
550  //
551  // Permute validate
552  //
553  TensorInfo permuteOutInfo = armnnUtils::Permuted(input, { 1U, 0U, 2U });
554  arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
555  if (!descriptor.m_TimeMajor)
556  {
557  statusPermute1 = arm_compute::NEPermute::validate(&aclInputInfo,
558  &aclPermuteOutInfo,
559  arm_compute::PermutationVector(0U, 2U, 1U));
560  }
561 
562  //
563  // Split and Concat Tensors validate
564  //
565  std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
566  std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
567  std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
568  std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
569  splitterOutputsTensorInfos.reserve(maxTime);
570  concatInputsTensorInfos.reserve(maxTime);
571  for (unsigned int i = 0; i < maxTime; ++i)
572  {
573  arm_compute::TensorInfo splitter_out;
574  arm_compute::TensorInfo concat_in;
575 
576  auto splitterTensorInfo = TensorInfo(input);
577  auto concatTensorInfo = TensorInfo(output);
578  splitterTensorInfo.SetShape({batchSize, inputSize});
579  concatTensorInfo.SetShape({batchSize, outputSize});
580 
581  arm_compute::TensorInfo aclSplitterTensorInfo
582  = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
583  arm_compute::TensorInfo aclConcatTensorInfo
584  = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
585 
586  splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
587  concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
588  splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
589  concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
590  }
591 
592  //
593  // Split validate
594  //
595  unsigned int numberDimensions = 3;
596  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
597  unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
598 
599  if (maxTime != 1) // ACL split does not work with only one element to split.
600  {
601  if (!descriptor.m_TimeMajor)
602  {
603  statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,
604  splitterOutputsTensorInfosPtr,
605  aclAxisSplit);
606  }
607  else
608  {
609  statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
610  }
611  }
612 
613  //
614  // LSTM validate
615  //
616 
617  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
618 
619  unsigned int numUnits = cellStateIn.GetShape()[1];
620  unsigned int scratchBufferFactor = 4;
621 
622  if (descriptor.m_CifgEnabled)
623  {
624  // scratchBuffer = { batchSize, numUnits * 3 } with CIFG
625  scratchBufferFactor = 3;
626  }
627 
628  const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());
629 
630  // The inputs and outputs
631  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
632  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
633  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
634  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
635  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
636 
637  // Basic parameters
638  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
639  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
640  const arm_compute::TensorInfo aclInputToCellWeightsInfo
641  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
642  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
643  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
644  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
645  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
646  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
647  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
648  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
649  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
650  const arm_compute::TensorInfo aclForgetGateBiasInfo
651  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
652  const arm_compute::TensorInfo aclCellBiasInfo
653  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
654  const arm_compute::TensorInfo aclOutputGateBiasInfo
655  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
656 
657  arm_compute::TensorInfo aclInputToInputWeightsInfo;
658  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
659  arm_compute::TensorInfo aclCellToInputWeightsInfo;
660  arm_compute::TensorInfo aclInputGateBiasInfo;
661  arm_compute::TensorInfo aclProjectionWeightsInfo;
662  arm_compute::TensorInfo aclProjectionBiasInfo;
663  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
664  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
665 
666  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
667  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
668  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
669  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
670 
671 
672  if (!descriptor.m_CifgEnabled)
673  {
674  if (descriptor.m_PeepholeEnabled)
675  {
676  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
677  }
678  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
679  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
680  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
681 
682  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
683  &aclRecurrentToInputWeightsInfo,
684  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
685  &aclInputGateBiasInfo);
686  }
687 
688  if (descriptor.m_ProjectionEnabled)
689  {
690  if (paramsInfo.m_ProjectionBias != nullptr)
691  {
692  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
693  }
694  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
695 
696  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
697  paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
698  }
699 
700  if (descriptor.m_PeepholeEnabled)
701  {
702  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
703  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
704 
705  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
706  }
707 
708  if (descriptor.m_LayerNormEnabled)
709  {
710  if (!descriptor.m_CifgEnabled)
711  {
712  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
713  }
714  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
715  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
716  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
717 
718  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
719  &aclInputLayerNormWeightsInfo,
720  &aclForgetLayerNormWeightsInfo,
721  &aclCellLayerNormWeightsInfo,
722  &aclOutputLayerNormWeightsInfo);
723  }
724 
725  // Need to be set at negative threshold to be compatible for ACL
726  float cell_threshold = descriptor.m_ClippingThresCell;
727  float projection_threshold = descriptor.m_ClippingThresProj;
728 
729  arm_compute::ActivationLayerInfo activationLayerInfo =
730  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
731 
732  for (unsigned int i = 0; i != maxTime; ++i)
733  {
734 
735  // Set LSTM input and output ITensors depending on:
736  // input format (timeMajor) & number of LSTM batches (maxTime).
737  arm_compute::ITensorInfo* outputLSTM;
738  arm_compute::ITensorInfo* inputLSTM;
739 
740  // If there is only one LSTM time major batch, we will not concat OR permute.
741  // Set input of LSTM to be first input ITensor.
742  // Set output of LSTM to be final output ITensor.
743  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
744  if (maxTime == 1 && descriptor.m_TimeMajor)
745  {
746  TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
747  TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
748 
749  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
750  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
751 
752  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
753  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
754 
755  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
756  inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
757 
758  const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
759  outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
760  }
761  // If there is only one LSTM batch major batch, we will not concat, only permute.
762  // Set input of LSTM to be output of initial permute.
763  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
764  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
765  else if (maxTime == 1 && !descriptor.m_TimeMajor)
766  {
767  TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
768  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
769  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
770  aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
771  inputLSTM = &aclPermuteOutInfo;
772 
773  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
774  }
775  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
776  else
777  {
778  inputLSTM = splitterOutputsTensorInfosPtr[i];
779  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
780  }
781 
782  statusLSTM = arm_compute::NELSTMLayer::validate(inputLSTM,
783  &aclInputToForgetWeightsInfo,
784  &aclInputToCellWeightsInfo,
785  &aclInputToOutputWeightsInfo,
786  &aclRecurrentToForgetWeightsInfo,
787  &aclRecurrentToCellWeightsInfo,
788  &aclRecurrentToOutputWeightsInfo,
789  &aclForgetGateBiasInfo,
790  &aclCellBiasInfo,
791  &aclOutputGateBiasInfo,
792  &aclOutputStateInInfo,
793  &aclCellStateInInfo,
794  &aclScratchBufferInfo,
795  &aclOutputStateOutInfo,
796  &aclCellStateOutInfo,
797  outputLSTM,
798  lstm_params_info,
799  activationLayerInfo,
800  cell_threshold,
801  projection_threshold);
802 
803  if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
804  {
805  break;
806  }
807  }
808 
809  //
810  // Concat validate
811  //
812 
813  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
814  TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
815  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
816  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
817 
818  TensorInfo concatOutputTensorInfo = TensorInfo(output);
819  concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
820  arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
821 
822  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
823  {
824  for (unsigned int i = 0; i < maxTime; ++i)
825  {
826  auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
827  concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
828  }
829 
830  unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
831  if (!descriptor.m_TimeMajor)
832  {
833  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
834  &aclConcatOutputTensorInfo,
835  aclAxisConcat);
836  }
837  else
838  {
839  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
840  &aclOutputInfo,
841  aclAxisConcat);
842  }
843  }
844  // If only one LSTM batch, we do not concat and/or permute.
845  // Must ensure final output info is expanded to correct batch major dimensions.
846  else
847  {
848  if (!descriptor.m_TimeMajor)
849  {
850  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
851  BuildArmComputeTensorShape(shapeExpandBatchMajor));
852  }
853  else
854  {
855  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
856  BuildArmComputeTensorShape(shapeExpandTimeMajor));
857  }
858  }
859 
860  //
861  // Permute validate
862  //
863  if (!descriptor.m_TimeMajor)
864  {
865  // Output now time major. Permute output back to batch major.
866  if (maxTime != 1)
867  {
868  statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
869  &aclOutputInfo,
870  arm_compute::PermutationVector(0U, 2U, 1U));
871  }
872  else
873  {
874  statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],
875  &aclOutputInfo,
876  arm_compute::PermutationVector(0U, 2U, 1U));
877  }
878  }
879 
880  auto okCode = arm_compute::ErrorCode::OK;
881  if (statusPermute1.error_code() == okCode &&
882  statusSplit.error_code() == okCode &&
883  statusLSTM .error_code() == okCode &&
884  statusConcat.error_code() == okCode &&
885  statusPermute2.error_code() == okCode)
886  {
887  return arm_compute::Status(arm_compute::ErrorCode::OK,
888  "All Unidirectional Sequence LSTM layer validate status OK.");
889  }
890  else
891  {
892  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
893  "Unidirectional Sequence LSTM layer validate status failed.");
894  }
895 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by NeonLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ NeonUnidirectionalSequenceLstmWorkloadValidate()

arm_compute::Status NeonUnidirectionalSequenceLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const UnidirectionalSequenceLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 491 of file NeonUnidirectionalSequenceLstmWorkload.cpp.

499 {
500  TensorShape inputLayerShape = input.GetShape();
501  TensorShape outputLayerShape = output.GetShape();
502 
503  if (inputLayerShape.GetNumDimensions() != 3)
504  {
505  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
506  "Unidirectional Sequence LSTM layer validate status failed.");
507  }
508 
509  unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
510  unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
511  unsigned int inputSize = inputLayerShape[2];
512  unsigned int outputSize = outputLayerShape[2];
513 
514  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
515  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
516 
517  arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
518  "Permute1 status");
519  arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
520  "Split status");
521  arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
522  "LSTM status");
523  arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
524  "Concat status");
525  arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
526  "Permute2 status");
527 
528  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
529  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
530 
531  //
532  // Permute validate
533  //
534  TensorInfo permuteOutInfo = armnnUtils::Permuted(input, { 1U, 0U, 2U });
535  arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
536  if (!descriptor.m_TimeMajor)
537  {
538  statusPermute1 = arm_compute::NEPermute::validate(&aclInputInfo,
539  &aclPermuteOutInfo,
540  arm_compute::PermutationVector(0U, 2U, 1U));
541  }
542 
543  //
544  // Split and Concat Tensors validate
545  //
546  std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
547  std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
548  std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
549  std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
550  splitterOutputsTensorInfos.reserve(maxTime);
551  concatInputsTensorInfos.reserve(maxTime);
552  for (unsigned int i = 0; i < maxTime; ++i)
553  {
554  arm_compute::TensorInfo splitter_out;
555  arm_compute::TensorInfo concat_in;
556 
557  auto splitterTensorInfo = TensorInfo(input);
558  auto concatTensorInfo = TensorInfo(output);
559  splitterTensorInfo.SetShape({batchSize, inputSize});
560  concatTensorInfo.SetShape({batchSize, outputSize});
561 
562  arm_compute::TensorInfo aclSplitterTensorInfo
563  = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
564  arm_compute::TensorInfo aclConcatTensorInfo
565  = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
566 
567  splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
568  concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
569  splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
570  concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
571  }
572 
573  //
574  // Split validate
575  //
576  unsigned int numberDimensions = 3;
577  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
578  unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
579 
580  if (maxTime != 1) // ACL split does not work with only one element to split.
581  {
582  if (!descriptor.m_TimeMajor)
583  {
584  statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,
585  splitterOutputsTensorInfosPtr,
586  aclAxisSplit);
587  } else
588  {
589  statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
590  }
591  }
592 
593  //
594  // LSTM validate
595  //
596 
597  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
598 
599  unsigned int numUnits = cellStateIn.GetShape()[1];
600  unsigned int scratchBufferFactor = 4;
601 
602  if (descriptor.m_CifgEnabled)
603  {
604  // scratchBuffer = { batchSize, numUnits * 3 } with CIFG
605  scratchBufferFactor = 3;
606  }
607 
608  const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());
609 
610 
611  lstm_params_info.set_cell_clip_params(descriptor.m_ClippingThresCell);
612  lstm_params_info.set_projection_clip_params(descriptor.m_ClippingThresProj);
613  // The inputs and outputs
614  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
615  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
616  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
617  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
618  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
619 
620  // Basic parameters
621  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
622  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
623  const arm_compute::TensorInfo aclInputToCellWeightsInfo
624  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
625  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
626  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
627  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
628  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
629  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
630  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
631  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
632  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
633  const arm_compute::TensorInfo aclForgetGateBiasInfo
634  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
635  const arm_compute::TensorInfo aclCellBiasInfo
636  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
637  const arm_compute::TensorInfo aclOutputGateBiasInfo
638  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
639 
640  arm_compute::TensorInfo aclInputToInputWeightsInfo;
641  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
642  arm_compute::TensorInfo aclCellToInputWeightsInfo;
643  arm_compute::TensorInfo aclInputGateBiasInfo;
644  arm_compute::TensorInfo aclProjectionWeightsInfo;
645  arm_compute::TensorInfo aclProjectionBiasInfo;
646  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
647  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
648 
649  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
650  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
651  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
652  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
653 
654  if (!descriptor.m_CifgEnabled)
655  {
656  if (descriptor.m_PeepholeEnabled)
657  {
658  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
659  }
660  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
661  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
662  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
663 
664  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
665  &aclRecurrentToInputWeightsInfo,
666  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
667  &aclInputGateBiasInfo);
668  }
669 
670  if (descriptor.m_ProjectionEnabled)
671  {
672  if (paramsInfo.m_ProjectionBias != nullptr)
673  {
674  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
675  }
676  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
677 
678  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
679  paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
680  }
681 
682  if (descriptor.m_PeepholeEnabled)
683  {
684  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
685  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
686 
687  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
688  }
689 
690  if (descriptor.m_LayerNormEnabled)
691  {
692  if (!descriptor.m_CifgEnabled)
693  {
694  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
695  }
696  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
697  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
698  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
699 
700  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
701  &aclInputLayerNormWeightsInfo,
702  &aclForgetLayerNormWeightsInfo,
703  &aclCellLayerNormWeightsInfo,
704  &aclOutputLayerNormWeightsInfo);
705  }
706 
707  lstm_params_info.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
708  descriptor.m_ForgetIntermediateScale,
709  descriptor.m_CellIntermediateScale,
710  descriptor.m_OutputIntermediateScale);
711 
712  lstm_params_info.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
713 
714  for (unsigned int i = 0; i != maxTime; ++i)
715  {
716 
717  // Set LSTM input and output ITensors depending on:
718  // input format (timeMajor) & number of LSTM batches (maxTime).
719  arm_compute::ITensorInfo* outputLSTM;
720  arm_compute::ITensorInfo* inputLSTM;
721 
722  // If there is only one LSTM time major batch, we will not concat OR permute.
723  // Set input of LSTM to be first input ITensor.
724  // Set output of LSTM to be final output ITensor.
725  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
726  if (maxTime == 1 && descriptor.m_TimeMajor)
727  {
728  TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
729  TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
730 
731  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
732  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
733 
734  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
735  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
736 
737  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
738  inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
739 
740  const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
741  outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
742  }
743  // If there is only one LSTM batch major batch, we will not concat, only permute.
744  // Set input of LSTM to be output of initial permute.
745  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
746  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
747  else if (maxTime == 1 && !descriptor.m_TimeMajor)
748  {
749  TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
750  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
751  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
752  aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
753  inputLSTM = &aclPermuteOutInfo;
754 
755  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
756  }
757  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
758  else
759  {
760  inputLSTM = splitterOutputsTensorInfosPtr[i];
761  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
762  }
763 
764  statusLSTM = arm_compute::NEQLSTMLayer::validate(inputLSTM,
765  &aclInputToForgetWeightsInfo,
766  &aclInputToCellWeightsInfo,
767  &aclInputToOutputWeightsInfo,
768  &aclRecurrentToForgetWeightsInfo,
769  &aclRecurrentToCellWeightsInfo,
770  &aclRecurrentToOutputWeightsInfo,
771  &aclForgetGateBiasInfo,
772  &aclCellBiasInfo,
773  &aclOutputGateBiasInfo,
774  &aclCellStateInInfo,
775  &aclOutputStateInInfo,
776  &aclCellStateOutInfo,
777  &aclOutputStateOutInfo,
778  outputLSTM,
779  lstm_params_info);
780  }
781 
782  //
783  // Concat validate
784  //
785 
786  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
787  TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
788  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
789  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
790 
791  TensorInfo concatOutputTensorInfo = TensorInfo(output);
792  concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
793  arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
794 
795  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
796  {
797  for (unsigned int i = 0; i < maxTime; ++i)
798  {
799  auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
800  concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
801  }
802 
803  unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
804  if (!descriptor.m_TimeMajor)
805  {
806  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
807  &aclConcatOutputTensorInfo,
808  aclAxisConcat);
809  }
810  else
811  {
812  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
813  &aclOutputInfo,
814  aclAxisConcat);
815  }
816  }
817  // If only one LSTM batch, we do not concat and/or permute.
818  // Must ensure final output info is expanded to correct batch major dimensions.
819  else
820  {
821  if (!descriptor.m_TimeMajor)
822  {
823  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
824  BuildArmComputeTensorShape(shapeExpandBatchMajor));
825  }
826  else
827  {
828  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
829  BuildArmComputeTensorShape(shapeExpandTimeMajor));
830  }
831  }
832 
833  //
834  // Permute validate
835  //
836  if (!descriptor.m_TimeMajor)
837  {
838  // Output now time major. Permute output back to batch major.
839  if (maxTime != 1)
840  {
841  statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
842  &aclOutputInfo,
843  arm_compute::PermutationVector(0U, 2U, 1U));
844  }
845  else
846  {
847  statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],
848  &aclOutputInfo,
849  arm_compute::PermutationVector(0U, 2U, 1U));
850  }
851  }
852 
853  auto okCode = arm_compute::ErrorCode::OK;
854  if (statusPermute1.error_code() == okCode &&
855  statusSplit.error_code() == okCode &&
856  statusLSTM .error_code() == okCode &&
857  statusConcat.error_code() == okCode &&
858  statusPermute2.error_code() == okCode)
859  {
860  return arm_compute::Status(arm_compute::ErrorCode::OK,
861  "All Unidirectional Sequence LSTM layer validate status OK.");
862  }
863  else
864  {
865  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
866  "Unidirectional Sequence LSTM layer validate status failed.");
867  }
868 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by NeonLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ NextIndex()

bool armnn::NextIndex ( const unsigned int  numDims,
const armnn::TensorShape dims,
std::vector< unsigned int > &  current 
)

Definition at line 19 of file Reduce.cpp.

20 {
21  unsigned int carry = 1;
22 
23  for (unsigned int idx = numDims; idx-- > 0; )
24  {
25  unsigned int current_val = current[idx] + carry;
26  if (dims[idx] == current_val)
27  {
28  current[idx] = 0;
29  }
30  else
31  {
32  current[idx] = current_val;
33  carry = 0;
34  break;
35  }
36  }
37  return (carry == 0);
38 }

Referenced by Reduce().

◆ NonMaxSuppression()

std::vector< unsigned int > NonMaxSuppression ( unsigned int  numBoxes,
const std::vector< float > &  boxCorners,
const std::vector< float > &  scores,
float  nmsScoreThreshold,
unsigned int  maxDetection,
float  nmsIouThreshold 
)

Definition at line 48 of file DetectionPostProcess.cpp.

54 {
55  // Select boxes that have scores above a given threshold.
56  std::vector<float> scoresAboveThreshold;
57  std::vector<unsigned int> indicesAboveThreshold;
58  for (unsigned int i = 0; i < numBoxes; ++i)
59  {
60  if (scores[i] >= nmsScoreThreshold)
61  {
62  scoresAboveThreshold.push_back(scores[i]);
63  indicesAboveThreshold.push_back(i);
64  }
65  }
66 
67  // Sort the indices based on scores.
68  unsigned int numAboveThreshold = armnn::numeric_cast<unsigned int>(scoresAboveThreshold.size());
69  std::vector<unsigned int> sortedIndices = GenerateRangeK(numAboveThreshold);
70  TopKSort(numAboveThreshold, sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold);
71 
72  // Number of output cannot be more than max detections specified in the option.
73  unsigned int numOutput = std::min(maxDetection, numAboveThreshold);
74  std::vector<unsigned int> outputIndices;
75  std::vector<bool> visited(numAboveThreshold, false);
76 
77  // Prune out the boxes with high intersection over union by keeping the box with higher score.
78  for (unsigned int i = 0; i < numAboveThreshold; ++i)
79  {
80  if (outputIndices.size() >= numOutput)
81  {
82  break;
83  }
84  if (!visited[sortedIndices[i]])
85  {
86  outputIndices.push_back(indicesAboveThreshold[sortedIndices[i]]);
87  for (unsigned int j = i + 1; j < numAboveThreshold; ++j)
88  {
89  unsigned int iIndex = indicesAboveThreshold[sortedIndices[i]] * 4;
90  unsigned int jIndex = indicesAboveThreshold[sortedIndices[j]] * 4;
91  if (IntersectionOverUnion(&boxCorners[iIndex], &boxCorners[jIndex]) > nmsIouThreshold)
92  {
93  visited[sortedIndices[j]] = true;
94  }
95  }
96  }
97  }
98  return outputIndices;
99 }

References GenerateRangeK(), IntersectionOverUnion(), and TopKSort().

Referenced by DetectionPostProcess().

◆ numeric_cast() [1/9]

std::enable_if_t< std::is_unsigned<Source>::value && std::is_unsigned<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 35 of file NumericCast.hpp.

36 {
37 #if ENABLE_NUMERIC_CAST_CHECKS
38  if (source > std::numeric_limits<Dest>::max())
39  {
40  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to "
41  "narrower unsigned type. Overflow detected.");
42  }
43 #endif // ENABLE_NUMERIC_CAST_CHECKS
44 
45  return static_cast<Dest>(source);
46 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [2/9]

std::enable_if_t< std::is_signed<Source>::value && std::is_integral<Source>::value && std::is_signed<Dest>::value && std::is_integral<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 58 of file NumericCast.hpp.

59 {
60 #if ENABLE_NUMERIC_CAST_CHECKS
61  if (source > std::numeric_limits<Dest>::max())
62  {
63  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
64  "Overflow detected.");
65  }
66 
67  if (source < std::numeric_limits<Dest>::lowest())
68  {
69  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
70  "Underflow detected.");
71  }
72 #endif // ENABLE_NUMERIC_CAST_CHECKS
73 
74  return static_cast<Dest>(source);
75 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [3/9]

std::enable_if_t< std::is_floating_point<Source>::value && std::is_floating_point<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 83 of file NumericCast.hpp.

84 {
85 #if ENABLE_NUMERIC_CAST_CHECKS
86  if (source > std::numeric_limits<Dest>::max())
87  {
88  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
89  "Overflow detected.");
90  }
91 
92  if (source < std::numeric_limits<Dest>::lowest())
93  {
94  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
95  "Underflow detected.");
96  }
97 #endif // ENABLE_NUMERIC_CAST_CHECKS
98 
99  return static_cast<Dest>(source);
100 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [4/9]

std::enable_if_t< std::is_floating_point<Source>::value && std::is_signed<Dest>::value && std::is_integral<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 109 of file NumericCast.hpp.

110 {
111 #if ENABLE_NUMERIC_CAST_CHECKS
112  if (source > static_cast<Source>(std::numeric_limits<Dest>::max()))
113  {
114  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
115  "Overflow detected.");
116  }
117 
118  if (source < static_cast<Source>(std::numeric_limits<Dest>::lowest()))
119  {
120  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
121  "Underflow detected.");
122  }
123 #endif // ENABLE_NUMERIC_CAST_CHECKS
124 
125  return static_cast<Dest>(source);
126 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [5/9]

std::enable_if_t< std::is_signed<Source>::value && std::is_integral<Source>::value && std::is_floating_point<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 135 of file NumericCast.hpp.

136 {
137 #if ENABLE_NUMERIC_CAST_CHECKS
138  Dest sourceConverted = static_cast<Dest>(source);
139 
140  if (sourceConverted > std::numeric_limits<Dest>::max())
141  {
142  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
143  "Overflow detected.");
144  }
145 
146  if (sourceConverted < std::numeric_limits<Dest>::lowest())
147  {
148  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
149  "Underflow detected.");
150  }
151 #endif // ENABLE_NUMERIC_CAST_CHECKS
152 
153  return static_cast<Dest>(source);
154 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [6/9]

std::enable_if_t< std::is_signed<Dest>::value && std::is_integral<Dest>::value && std::is_unsigned<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 165 of file NumericCast.hpp.

166 {
167 #if ENABLE_NUMERIC_CAST_CHECKS
168  if (sValue > static_cast< typename std::make_unsigned<Dest>::type >(std::numeric_limits<Dest>::max()))
169  {
170  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to signed type. "
171  "Overflow detected.");
172  }
173 #endif // ENABLE_NUMERIC_CAST_CHECKS
174 
175  return static_cast<Dest>(sValue);
176 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [7/9]

std::enable_if_t< std::is_floating_point<Dest>::value && std::is_unsigned<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 184 of file NumericCast.hpp.

185 {
186 #if ENABLE_NUMERIC_CAST_CHECKS
187  if (static_cast<Dest>(sValue) > std::numeric_limits<Dest>::max())
188  {
189  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to floating point type. "
190  "Overflow detected.");
191  }
192 #endif // ENABLE_NUMERIC_CAST_CHECKS
193 
194  return static_cast<Dest>(sValue);
195 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [8/9]

std::enable_if_t< std::is_unsigned<Dest>::value && std::is_signed<Source>::value && std::is_integral<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 206 of file NumericCast.hpp.

207 {
208 #if ENABLE_NUMERIC_CAST_CHECKS
209  if (sValue < 0)
210  {
211  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
212  "Underflow detected.");
213  }
214 
215  if (static_cast< typename std::make_unsigned<Source>::type >(sValue) > std::numeric_limits<Dest>::max())
216  {
217  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to unsigned type. "
218  "Overflow detected.");
219  }
220 #endif // ENABLE_NUMERIC_CAST_CHECKS
221  return static_cast<Dest>(sValue);
222 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [9/9]

std::enable_if_t< std::is_unsigned<Dest>::value && std::is_floating_point<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 230 of file NumericCast.hpp.

231 {
232 #if ENABLE_NUMERIC_CAST_CHECKS
233  if (sValue < 0)
234  {
235  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
236  "Underflow detected.");
237  }
238 
239  if (sValue > static_cast<Source>(std::numeric_limits<Dest>::max()))
240  {
241  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to unsigned type. "
242  "Overflow detected.");
243  }
244 #endif // ENABLE_NUMERIC_CAST_CHECKS
245  return static_cast<Dest>(sValue);
246 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ Offset()

unsigned int armnn::Offset ( const TensorShape shape,
unsigned int  batch,
unsigned int  height,
unsigned int  width,
unsigned int  channels,
const DataLayoutIndexed dataLayout 
)

Definition at line 15 of file BatchToSpaceNd.cpp.

21 {
22  // 3D Tensors
23  unsigned int channelDimension3D = dataLayout.GetDataLayout() == DataLayout::NCHW ? 1 : 2;
24  if (shape.GetNumDimensions() == 3)
25  {
26  return (batch * shape[dataLayout.GetHeightIndex()] + height) * shape[channelDimension3D] + channels;
27  }
28  // 4D Tensors
29  else if (shape.GetNumDimensions() == 4)
30  {
31  if (dataLayout.GetDataLayout() == DataLayout::NHWC)
32  {
33  return ((batch * shape[dataLayout.GetHeightIndex()] + height) *
34  shape[dataLayout.GetWidthIndex()] + width) *
35  shape[dataLayout.GetChannelsIndex()] + channels;
36  }
37  else
38  {
39  return ((batch * shape[dataLayout.GetChannelsIndex()] + channels) *
40  shape[dataLayout.GetHeightIndex()] + height) *
41  shape[dataLayout.GetWidthIndex()] + width;
42  }
43  }
44  else
45  {
46  throw InvalidArgumentException("Tensor rank must be either 3 or 4", CHECK_LOCATION());
47  }
48 }

References CHECK_LOCATION, DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumDimensions(), and DataLayoutIndexed::GetWidthIndex().

Referenced by BatchToSpaceNd().

◆ operator<<() [1/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const armnn::TensorShape shape 
)
inline

Definition at line 335 of file TypesUtils.hpp.

336 {
337  os << "[";
338  if (shape.GetDimensionality() != Dimensionality::NotSpecified)
339  {
340  for (uint32_t i = 0; i < shape.GetNumDimensions(); ++i)
341  {
342  if (i != 0)
343  {
344  os << ",";
345  }
346  if (shape.GetDimensionSpecificity(i))
347  {
348  os << shape[i];
349  }
350  else
351  {
352  os << "?";
353  }
354  }
355  }
356  else
357  {
358  os << "Dimensionality Not Specified";
359  }
360  os << "]";
361  return os;
362 }

References TensorShape::GetDimensionality(), TensorShape::GetDimensionSpecificity(), TensorShape::GetNumDimensions(), and NotSpecified.

◆ operator<<() [2/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const BackendId id 
)
inline

Definition at line 176 of file BackendId.hpp.

177 {
178  os << id.Get();
179  return os;
180 }

◆ operator<<() [3/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const BackendVersion backendVersion 
)
inline

Definition at line 70 of file IBackendInternal.hpp.

71 {
72  os << "[" << backendVersion.m_Major << "." << backendVersion.m_Minor << "]";
73 
74  return os;
75 }

References BackendVersion::m_Major, and BackendVersion::m_Minor.

◆ operator<<() [4/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const BFloat16 b 
)
inline

Definition at line 122 of file BFloat16.hpp.

123 {
124  os << b.ToFloat32() << "(0x" << std::hex << b.Val() << ")";
125  return os;
126 }

References BFloat16::ToFloat32(), and BFloat16::Val().

◆ operator<<() [5/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const Compute compute 
)
inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 69 of file BackendId.hpp.

70 {
71  os << GetComputeDeviceAsCString(compute);
72  return os;
73 }

References GetComputeDeviceAsCString().

◆ operator<<() [6/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const std::set< Compute > &  compute 
)
inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 58 of file BackendId.hpp.

59 {
60  for (const Compute& comp : compute)
61  {
62  os << GetComputeDeviceAsCString(comp) << " ";
63  }
64  return os;
65 }

References GetComputeDeviceAsCString().

◆ operator<<() [7/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const std::vector< Compute > &  compute 
)
inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 47 of file BackendId.hpp.

48 {
49  for (const Compute& comp : compute)
50  {
51  os << GetComputeDeviceAsCString(comp) << " ";
52  }
53  return os;
54 }

References GetComputeDeviceAsCString().

◆ operator<<() [8/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const TContainer< BackendId, TContainerTemplateArgs... > &  ids 
)

Definition at line 183 of file BackendId.hpp.

185 {
186  os << '[';
187  for (const auto& id : ids) { os << id << " "; }
188  os << ']';
189  return os;
190 }

◆ operator<<() [9/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
Status  stat 
)
inline

Definition at line 328 of file TypesUtils.hpp.

329 {
330  os << GetStatusAsCString(stat);
331  return os;
332 }

References GetStatusAsCString().

◆ Optimize() [1/4]

IOptimizedNetworkPtr Optimize ( const Graph inGraph,
const std::vector< BackendId > &  backendPreferences,
const IDeviceSpec deviceSpec,
const OptimizerOptions options,
Optional< std::vector< std::string > & >  messages = EmptyOptional() 
)

Accept legacy OptimizerOptions.

Definition at line 1893 of file Network.cpp.

1898 {
1899  return Optimize(inGraph,
1900  backendPreferences,
1901  deviceSpec,
1902  OptimizerOptionsOpaque(options),
1903  messages);
1904 }

References Optimize().

◆ Optimize() [2/4]

IOptimizedNetworkPtr Optimize ( const Graph inGraph,
const std::vector< BackendId > &  backendPreferences,
const IDeviceSpec deviceSpec,
const OptimizerOptionsOpaque options,
Optional< std::vector< std::string > & >  messages = EmptyOptional() 
)

Create an optimized version of the network.

Parameters
inGraphGraph to be optimized.
backendPreferencesThe choice of the backend ordered by user preferences.
deviceSpecDeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
messagesIf there are failures or warnings a string describing same will be added to the vector
optionsOptimizerOptions object with optimizer configuration options
Returns
An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from armnn::Exception if process fails.

Definition at line 1906 of file Network.cpp.

1911 {
1912  ARMNN_LOG(debug) << options.ToString();
1913 
1914  // Enable profiling
1915  auto profiler = inGraph.GetProfiler();
1916  ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
1917  profiler->EnableProfiling(options.GetProfilingEnabled());
1918 
1919  // Some backends don't play well together. Check here before continuing.
1920  {
1921  std::set<BackendId> backendSet(backendPreferences.begin(), backendPreferences.end());
1922  // GpuFsa cannot co-exist with GpuAcc.
1923  if (backendSet.find("GpuFsa") != backendSet.end() &&
1924  backendSet.find("GpuAcc") != backendSet.end())
1925  {
1926  throw InvalidArgumentException("The backends \"GpuAcc\" and \"GpuFsa\" cannot be specified "
1927  "for the same optimized network.");
1928  }
1929  }
1930 
1931  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer");
1932  if (backendPreferences.empty())
1933  {
1934  throw InvalidArgumentException("Invoked Optimize with no backends specified");
1935  }
1936 
1937  if (options.GetReduceFp32ToBf16())
1938  {
1939  throw InvalidArgumentException("BFloat16 optimization is currently ignored. In order to use Bf16 optimization "
1940  "Please use the FastMathEnabled backend option for CpuAcc or GpuAcc.");
1941  }
1942 
1943  if (options.GetReduceFp32ToFp16() && options.GetReduceFp32ToBf16())
1944  {
1945  throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
1946  }
1947 
1948  // Ensure TensorInfo is set on all output slots of ConstantLayers in the graph
1949  inGraph.VerifyConstantLayerSetTensorInfo();
1950 
1951  std::unique_ptr<Graph> graph = std::make_unique<Graph>(inGraph);
1952 
1953  // We need to pass on the information about whether import and export is enabled to the LoadNetwork phase.
1954  // The mechanism to do that is to add model options to the optimized network.
1955  armnn::BackendOptions importExport("Global",
1956  {{"ImportEnabled", options.GetImportEnabled()},
1957  {"ExportEnabled", options.GetExportEnabled()}});
1958  ModelOptions optimizedOptions(options.GetModelOptions());
1959  optimizedOptions.push_back(importExport);
1960 
1961  auto optNet = IOptimizedNetworkPtr(new IOptimizedNetwork(std::move(graph), optimizedOptions),
1962  &IOptimizedNetwork::Destroy);
1963 
1964  IOptimizedNetwork* optNetObjPtr = optNet.get();
1965 
1966  // Get the optimized graph
1967  Graph& optGraph = optNetObjPtr->pOptimizedNetworkImpl->GetGraph();
1968 
1969  if(options.GetShapeInferenceMethod() == ShapeInferenceMethod::InferAndValidate)
1970  {
1971  // Infer the tensor infos for all output slots. Throws an exception on failure
1972  optGraph.InferTensorInfos();
1973  }
1974 
1975  using namespace optimizations;
1976  // Substitute Max + Min with Bounded Relu before AddBroadcastReshapeLayer optimisation,
1977  // as Bounded ReLu needs the constants to be 1D size 1
1978  Optimizer::Pass(optGraph, MakeOptimizations(MaxMinIntoBoundedRelu()));
1979 
1980  // Perform BroadcastToOptimizationLayer before AddBroadcastReshapeLayer optimisation
1981  Optimizer::Pass(optGraph, MakeOptimizations(BroadcastToOptimizationLayer()));
1982 
1983  Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer()));
1984 
1985  if(options.GetShapeInferenceMethod() == ShapeInferenceMethod::ValidateOnly)
1986  {
1987  // Validate the tensor infos for all output slots. Throws an exception on failure
1988  optGraph.InferTensorInfos();
1989  }
1990 
1991  // Group Constant Layer optimizations together where possible.
1992  // This is important as:
1993  // FusePermuteIntoConstantLayer must happen before FoldPadIntoDepthwiseConvolution2d and
1994  // FuseBatchNormIntoDepthwiseConvolution2D.
1995  // ConvertConstDequantisationLayersToConstLayers must happen before FoldPadIntoConvolution2d
1996  Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer(),
1998  // Perform optimisation passes
1999  Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
2004  MovePermuteUp(),
2005  MoveTransposeUp(),
2006  PermuteAsReshape(),
2019 
2020  // Initialize backend settings
2021  BackendSettings backendSettings(backendPreferences, deviceSpec);
2022  auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
2023  if (availablePreferredBackends.empty())
2024  {
2025  std::stringstream failureMsg;
2026  failureMsg << "None of the preferred backends " << backendPreferences
2027  << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
2028  ReportError(failureMsg.str(), messages);
2029  throw InvalidArgumentException(failureMsg.str());
2030  }
2031 
2032  // Create a map to temporarily hold initialized backend objects
2033  TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
2034  BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
2035 
2036  if (options.GetReduceFp32ToFp16())
2037  {
2038  bool hasFp16 = CheckFp16Support(backends, availablePreferredBackends);
2039  if (hasFp16)
2040  {
2041  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
2042  Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
2043  Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
2044  }
2045  }
2046 
2047  // Assign an available backend to each layer
2048  Graph::Iterator firstLayer = optGraph.begin();
2049  Graph::Iterator lastLayer = optGraph.end();
2050  OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr->pOptimizedNetworkImpl.get(),
2051  backendSettings,
2052  firstLayer,
2053  lastLayer,
2054  messages);
2055  if (assignBackendsResult.m_Error)
2056  {
2057  // Failed to assign a backend to each layer
2058  throw InvalidArgumentException("Failed to assign a backend to each layer");
2059  }
2060 
2061  Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
2063 
2064  // Apply the backend-specific optimizations
2065  OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr->pOptimizedNetworkImpl.get(),
2066  backendSettings,
2067  backends,
2068  options.GetModelOptions(),
2069  messages);
2070  if (backendOptimizationResult.m_Error)
2071  {
2072  // Failed to apply the backend-specific optimizations
2073  throw InvalidArgumentException("Failed to apply the backend-specific optimizations");
2074  }
2075 
2076  // Convert constants
2077  {
2078  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ConvertConstants");
2079  Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
2080  Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
2081  }
2082 
2083  // This must occur after all topological changes to the graph and any redirection of variables
2084  // If the debug flag is set, then insert a DebugLayer after each layer
2085  // Doing this after applying the backend optimizations as they might have changed some layers
2086  if (options.GetDebugEnabled() && !options.GetDebugToFileEnabled())
2087  {
2088  Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
2089  }
2090  else if (options.GetDebugToFileEnabled())
2091  {
2092  // Setup the output file path
2093  try
2094  {
2095 #if !defined(ARMNN_DISABLE_FILESYSTEM)
2096  auto result = armnnUtils::Filesystem::CreateDirectory("/ArmNNIntermediateLayerOutputs");
2097  ARMNN_LOG(info) << "Intermediate tensors will be written to: " << result;
2098 #endif
2099  Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugToFileLayer()));
2100  }
2101  catch (const armnn::RuntimeException& e)
2102  {
2103  // If we cannot create the output directory then we'll issue a warning and continue.
2104  ARMNN_LOG(warning) << "Unable to print intermediate layer outputs : " << e.what();
2105  }
2106  }
2107 
2108  // Calculate the compatibility strategies for tensor handles
2109  OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
2110  backends,
2111  tensorHandleFactoryRegistry,
2112  options.GetImportEnabled(),
2113  options.GetExportEnabled(),
2114  messages);
2115 
2116  if (strategyResult.m_Error)
2117  {
2118  // Failed to apply the backend-specific optimizations
2119  return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
2120  }
2121 
2122  // Based on the tensor handle strategy determined above, insert copy layers where required.
2123  {
2124  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AddCompatibilityLayers");
2125  optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
2126  }
2127 
2128  return optNet;
2129 }

References Graph::AddCompatibilityLayers(), ApplyBackendOptimizations(), ARMNN_LOG, ARMNN_SCOPED_PROFILING_EVENT, AssignBackends(), Graph::begin(), CheckFp16Support(), armnnUtils::Filesystem::CreateDirectory(), CreateSupportedBackends(), debug, Graph::end(), BackendSettings::GetAvailablePreferredBackends(), OptimizerOptionsOpaque::GetDebugEnabled(), OptimizerOptionsOpaque::GetDebugToFileEnabled(), OptimizerOptionsOpaque::GetExportEnabled(), OptimizerOptionsOpaque::GetImportEnabled(), OptimizerOptionsOpaque::GetModelOptions(), Graph::GetProfiler(), OptimizerOptionsOpaque::GetProfilingEnabled(), OptimizerOptionsOpaque::GetReduceFp32ToBf16(), OptimizerOptionsOpaque::GetReduceFp32ToFp16(), OptimizerOptionsOpaque::GetShapeInferenceMethod(), Graph::InferTensorInfos(), info, OptimizationResult::m_Error, BackendSettings::m_SupportedBackends, MakeOptimizations(), IOptimizedNetwork::pOptimizedNetworkImpl, ReportError(), SelectTensorHandleStrategy(), OptimizerOptionsOpaque::ToString(), Graph::VerifyConstantLayerSetTensorInfo(), warning, and Exception::what().

◆ Optimize() [3/4]

IOptimizedNetworkPtr Optimize ( const INetwork network,
const std::vector< BackendId > &  backendPreferences,
const IDeviceSpec deviceSpec,
const OptimizerOptions options,
Optional< std::vector< std::string > & >  messages = EmptyOptional() 
)

Accept legacy OptimizerOptions.

Definition at line 2132 of file Network.cpp.

2137 {
2138  return Optimize(inNetwork,
2139  backendPreferences,
2140  deviceSpec,
2141  OptimizerOptionsOpaque(options),
2142  messages);
2143 }

References Optimize().

◆ Optimize() [4/4]

IOptimizedNetworkPtr Optimize ( const INetwork network,
const std::vector< BackendId > &  backendPreferences,
const IDeviceSpec deviceSpec,
const OptimizerOptionsOpaque options = OptimizerOptionsOpaque(),
Optional< std::vector< std::string > & >  messages = EmptyOptional() 
)

Create an optimized version of the network.

Parameters
networkINetwork description of the network to be optimized.
backendPreferencesThe choice of the backend ordered by user preferences.
deviceSpecDeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
messagesIf there are failures or warnings a string describing same will be added to the vector
optionsOptimizerOptions object with optimizer configuration options
Returns
An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from armnn::Exception if process fails.
Examples
AsyncExecutionSample.cpp, CustomMemoryAllocatorSample.cpp, DynamicSample.cpp, and SimpleSample.cpp.

Definition at line 2145 of file Network.cpp.

2150 {
2151  return Optimize(inNetwork.pNetworkImpl->GetGraph(),
2152  backendPreferences,
2153  deviceSpec,
2154  options,
2155  messages);
2156 }

References INetwork::pNetworkImpl.

Referenced by Optimize(), ArmnnDriverImpl::PrepareArmnnModel(), ArmnnDriverImpl::PrepareArmnnModelFromCache(), ParserPrototxtFixture< TParser >::Setup(), and ParserPrototxtFixture< TParser >::SetupOptimizedNetwork().

◆ Pad()

void Pad ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const ITensorHandle inputHandle,
ITensorHandle outputHandle,
const PadQueueDescriptor data 
)

Definition at line 39 of file Pad.cpp.

44 {
45  auto padList = data.m_Parameters.m_PadList;
46  auto padValue = data.m_Parameters.m_PadValue;
47 
48  unsigned int numOutputElements = outputInfo.GetNumElements();
49 
50  TensorShape outputShape = outputInfo.GetShape();
51  TensorShape inputShape = inputInfo.GetShape();
52 
53  unsigned int numInputDimensions = inputShape.GetNumDimensions();
54 
55 #ifndef NDEBUG
56 
57  unsigned int numOutputDimensions = outputShape.GetNumDimensions();
58  assert(numInputDimensions == numOutputDimensions);
59 
60 #endif
61 
62  unsigned int inputBatches = 0;
63  unsigned int inputChannels = 0;
64  unsigned int inputHeight = 0;
65  unsigned int inputWidth = 0;
66  unsigned int inputDim5 = 0;
67 
68  unsigned int outputBatches = 0;
69  unsigned int outputChannels = 0;
70  unsigned int outputHeight = 0;
71  unsigned int outputWidth = 0;
72 
73  auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
74  auto outData = MakeEncoder<float>(outputInfo, outputHandle->Map());
75 
76  // Fill the output tensor with Pad value first
77  if (outputInfo.IsQuantized())
78  {
79  // For Quantized types Pad Value should not be quantized with scale and offset of the tensor info
80  auto temporaryInfo = TensorInfo(outputInfo.GetShape(), outputInfo.GetDataType(), 1.0f, 0);
81 
82  auto outputData = MakeEncoder<float>(temporaryInfo, outputHandle->Map());
83  FillOutputWithPadValue(*outputData, padValue, numOutputElements);
84  }
85  else
86  {
87  FillOutputWithPadValue(*outData, padValue, numOutputElements);
88  }
89 
90  Decoder<float>& input = *inputData;
91  Encoder<float>& output = *outData;
92 
93  switch(numInputDimensions) {
94 
95  case 1:
96  inputWidth = inputShape[0];
97  for (unsigned int w = 0; w < inputWidth ; w++)
98  {
99  input[w];
100  auto inputValue = input.Get();
101  auto outputIndex = w + padList[0].first;
102  output[outputIndex];
103  output.Set(inputValue);
104  }
105 
106  break;
107  case 2:
108  inputHeight = inputShape[0];
109  inputWidth = inputShape[1];
110  outputWidth = outputShape[1];
111 
112  for (unsigned int h = 0; h < inputHeight; h++)
113  {
114  for (unsigned int w = 0; w < inputWidth ; w++)
115  {
116  input[h * inputWidth + w];
117  auto inputValue = input.Get();
118  auto outputIndex = (h + padList[0].first) * outputWidth + (w + padList[1].first);
119  output[outputIndex];
120  output.Set(inputValue);
121  }
122  }
123 
124  break;
125  case 3:
126  inputChannels = inputShape[0];
127  inputHeight = inputShape[1];
128  inputWidth = inputShape[2];
129  outputHeight = outputShape[1];
130  outputWidth = outputShape[2];
131 
132  for (unsigned int c = 0; c < inputChannels; c++)
133  {
134  for (unsigned int h = 0; h < inputHeight; h++)
135  {
136  for (unsigned int w = 0; w < inputWidth ; w++)
137  {
138  input[c * inputHeight * inputWidth + h * inputWidth + w];
139  auto inputValue = input.Get();
140  auto outputIndex = (c + padList[0].first) * outputHeight * outputWidth
141  + (h + padList[1].first) * outputWidth
142  + (w + padList[2].first);
143  output[outputIndex];
144  output.Set(inputValue);
145  }
146  }
147  }
148 
149  break;
150  case 4:
151  inputBatches = inputShape[0];
152  inputChannels = inputShape[1];
153  inputHeight = inputShape[2];
154  inputWidth = inputShape[3];
155  outputChannels = outputShape[1];
156  outputHeight = outputShape[2];
157  outputWidth = outputShape[3];
158 
159  for (unsigned int b = 0; b < inputBatches; b++)
160  {
161  for (unsigned int c = 0; c < inputChannels; c++)
162  {
163  for (unsigned int h = 0; h < inputHeight; h++)
164  {
165  for (unsigned int w = 0; w < inputWidth ; w++)
166  {
167  input[b * inputChannels * inputHeight * inputWidth
168  + c * inputHeight * inputWidth
169  + h * inputWidth
170  + w];
171  auto inputValue = input.Get();
172  auto outputIndex = (b + padList[0].first)
173  * outputChannels * outputHeight * outputWidth
174  + (c + padList[1].first) * outputHeight * outputWidth
175  + (h + padList[2].first) * outputWidth
176  + (w + padList[3].first);
177  output[outputIndex];
178  output.Set(inputValue);
179  }
180  }
181  }
182  }
183  break;
184 
185  case 5:
186  inputBatches = inputShape[0];
187  inputChannels = inputShape[1];
188  inputHeight = inputShape[2];
189  inputWidth = inputShape[3];
190  inputDim5 = inputShape[4];
191 
192  outputBatches = outputShape[1];
193  outputChannels = outputShape[2];
194  outputHeight = outputShape[3];
195  outputWidth = outputShape[4];
196 
197  for (unsigned int b = 0; b < inputBatches; ++b)
198  {
199  for (unsigned int c = 0; c < inputChannels; ++c)
200  {
201  for (unsigned int h = 0; h < inputHeight; ++h)
202  {
203  for (unsigned int w = 0; w < inputWidth ; ++w)
204  {
205  for (unsigned int d = 0; d < inputDim5 ; ++d)
206  {
207  input[b * inputChannels * inputHeight * inputWidth * inputDim5
208  + c * inputHeight * inputWidth * inputDim5
209  + h * inputWidth * inputDim5
210  + d];
211 
212  auto inputValue = input.Get();
213 
214  auto outputIndex = (b + padList[0].first)
215  * outputBatches * outputChannels * outputHeight * outputWidth
216  + (c + padList[1].first) * outputChannels * outputHeight*outputWidth
217  + (h + padList[2].first) * outputHeight * outputWidth
218  + (w + padList[3].first) * outputWidth
219  + (d + padList[4].first);
220 
221  output[outputIndex];
222  output.Set(inputValue);
223  }
224  }
225  }
226  }
227  }
228  break;
229 
230  default:
231  break;
232  }
233 }

References Decoder< IType >::Get(), TensorInfo::GetDataType(), TensorShape::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), TensorInfo::IsQuantized(), PadDescriptor::m_PadList, PadDescriptor::m_PadValue, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, ITensorHandle::Map(), and Encoder< IType >::Set().

◆ ParseBooleanBackendOption()

bool armnn::ParseBooleanBackendOption ( const armnn::BackendOptions::Var value,
bool  defaultValue 
)
inline

Definition at line 312 of file BackendOptions.hpp.

313 {
314  if (value.IsBool())
315  {
316  return value.AsBool();
317  }
318  return defaultValue;
319 }

References BackendOptions::Var::AsBool(), and BackendOptions::Var::IsBool().

◆ ParseComputeDevice()

constexpr armnn::Compute armnn::ParseComputeDevice ( const char *  str)
constexpr

Deprecated function that will be removed together with the Compute enum.

Definition at line 213 of file TypesUtils.hpp.

214 {
215  if (armnn::StrEqual(str, "CpuAcc"))
216  {
217  return armnn::Compute::CpuAcc;
218  }
219  else if (armnn::StrEqual(str, "CpuRef"))
220  {
221  return armnn::Compute::CpuRef;
222  }
223  else if (armnn::StrEqual(str, "GpuAcc"))
224  {
225  return armnn::Compute::GpuAcc;
226  }
227  else
228  {
230  }
231 }

References CpuAcc, CpuRef, GpuAcc, StrEqual(), and Undefined.

◆ ParseIntBackendOption()

int armnn::ParseIntBackendOption ( const armnn::BackendOptions::Var value,
int  defaultValue 
)
inline

Definition at line 330 of file BackendOptions.hpp.

331 {
332  if (value.IsInt())
333  {
334  return value.AsInt();
335  }
336  return defaultValue;
337 }

References BackendOptions::Var::AsInt(), and BackendOptions::Var::IsInt().

Referenced by ClBackendModelContext::ClBackendModelContext().

◆ ParseOptions()

void armnn::ParseOptions ( const std::vector< BackendOptions > &  options,
BackendId  backend,
f 
)

Definition at line 297 of file BackendOptions.hpp.

298 {
299  for (auto optionsGroup : options)
300  {
301  if (optionsGroup.GetBackendId() == backend)
302  {
303  for (size_t i=0; i < optionsGroup.GetOptionCount(); i++)
304  {
305  const BackendOptions::BackendOption option = optionsGroup.GetOption(i);
306  f(option.GetName(), option.GetValue());
307  }
308  }
309  }
310 }

References BackendOptions::BackendOption::GetName(), and BackendOptions::BackendOption::GetValue().

Referenced by ClBackendContext::ClBackendContext(), ClBackendModelContext::ClBackendModelContext(), GpuFsaBackendContext::GpuFsaBackendContext(), NeonBackendModelContext::NeonBackendModelContext(), and RuntimeImpl::RuntimeImpl().

◆ ParseStringBackendOption()

std::string armnn::ParseStringBackendOption ( const armnn::BackendOptions::Var value,
std::string  defaultValue 
)
inline

Definition at line 321 of file BackendOptions.hpp.

322 {
323  if (value.IsString())
324  {
325  return value.AsString();
326  }
327  return defaultValue;
328 }

References BackendOptions::Var::AsString(), and BackendOptions::Var::IsString().

Referenced by ClBackendContext::ClBackendContext(), and GpuFsaBackendContext::GpuFsaBackendContext().

◆ ParseTuningLevel()

TuningLevel armnn::ParseTuningLevel ( const BackendOptions::Var value,
TuningLevel  defaultValue 
)
inline

Definition at line 26 of file ArmComputeTuningUtils.hpp.

27 {
28  if (value.IsInt())
29  {
30  int v = value.AsInt();
31  if (v > static_cast<int>(TuningLevel::Exhaustive) ||
32  v < static_cast<int>(TuningLevel::None))
33  {
34  ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. "
35  "Using default(" << static_cast<int>(defaultValue) << ")";
36  } else
37  {
38  return static_cast<TuningLevel>(v);
39  }
40  }
41  return defaultValue;
42 }

References ARMNN_LOG, BackendOptions::Var::AsInt(), Exhaustive, BackendOptions::Var::IsInt(), None, and warning.

Referenced by ClBackendContext::ClBackendContext(), and GpuFsaBackendContext::GpuFsaBackendContext().

◆ PermuteTensor()

armnn::ConstTensor PermuteTensor ( const ConstTensorHandle tensor,
const PermutationVector permutationVector,
void *  permuteBuffer 
)

Definition at line 19 of file WorkloadUtils.cpp.

21 {
22  if (tensor == nullptr)
23  {
24  throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null input tensor pointer");
25  }
26  if (permuteBuffer == nullptr)
27  {
28  throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null permute buffer pointer");
29  }
30 
31  TensorInfo tensorInfo = tensor->GetTensorInfo();
32 
33  if (permutationVector.GetSize() > 0)
34  {
35  tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector);
36  armnnUtils::Permute(tensorInfo.GetShape(), permutationVector,
37  tensor->GetConstTensor<void>(), permuteBuffer,
38  GetDataTypeSize(tensorInfo.GetDataType()));
39  }
40  else
41  {
42  ::memcpy(permuteBuffer, tensor->GetConstTensor<void>(), tensorInfo.GetNumBytes());
43  }
44  tensorInfo.SetConstant(true);
45  return ConstTensor(tensorInfo, permuteBuffer);
46 }

References ConstTensorHandle::GetConstTensor(), TensorInfo::GetDataType(), GetDataTypeSize(), TensorInfo::GetNumBytes(), TensorInfo::GetShape(), PermutationVector::GetSize(), ConstTensorHandle::GetTensorInfo(), armnnUtils::Permute(), armnnUtils::Permuted(), and TensorInfo::SetConstant().

Referenced by Convert1HWOTensorToAcl(), Convert1HWOtoMIHW(), and ConvertWeightTensorFromArmnnToAcl().

◆ PolymorphicDowncast()

DestType armnn::PolymorphicDowncast ( SourceType *  value)

Polymorphic downcast for build in pointers only.

Usage: Child* pChild = PolymorphicDowncast<Child*>(pBase);

Template Parameters
DestTypePointer type to the target object (Child pointer type)
SourceTypePointer type to the source object (Base pointer type)
Parameters
valuePointer to the source object
Returns
Pointer of type DestType (Pointer of type child)

Definition at line 74 of file PolymorphicDowncast.hpp.

75 {
76  static_assert(std::is_pointer<DestType>::value,
77  "PolymorphicDowncast only works with pointer types.");
78 
79  ARMNN_POLYMORPHIC_CAST_CHECK(dynamic_cast<DestType>(value) == value);
80  return static_cast<DestType>(value);
81 }

References ARMNN_POLYMORPHIC_CAST_CHECK.

Referenced by ClLayerSupport::IsLayerSupported(), and IsLayerTypeSupported().

◆ PolymorphicPointerDowncast()

auto armnn::PolymorphicPointerDowncast ( const SourceType &  value)

Polymorphic downcast for shared pointers and build in pointers.

Usage: auto pChild = PolymorphicPointerDowncast<Child>(pBase)

Template Parameters
DestTypeType of the target object (Child type)
SourceTypePointer type to the source object (Base (shared) pointer type)
Parameters
valuePointer to the source object
Returns
Pointer of type DestType ((Shared) pointer of type child)

Definition at line 93 of file PolymorphicDowncast.hpp.

94 {
95  ARMNN_POLYMORPHIC_CAST_CHECK(utility::DynamicPointerCast<DestType>(value)
96  == value);
97  return utility::StaticPointerCast<DestType>(value);
98 }

References ARMNN_POLYMORPHIC_CAST_CHECK.

◆ Pooling2d()

void Pooling2d ( Decoder< float > &  rInputDecoder,
Encoder< float > &  rOutputEncoder,
const TensorInfo inputInfo,
const TensorInfo outputInfo,
const Pooling2dDescriptor params 
)

Computes the Pooling2d operation.

Definition at line 142 of file Pooling2d.cpp.

147 {
148  const DataLayoutIndexed dataLayout(params.m_DataLayout);
149  auto channelsIndex = dataLayout.GetChannelsIndex();
150  auto heightIndex = dataLayout.GetHeightIndex();
151  auto widthIndex = dataLayout.GetWidthIndex();
152 
153  const int batchSize = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
154  const int channels = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
155  const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
156  const int widthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
157  const int heightInput = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
158  const int widthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
159  const int padLeft = armnn::numeric_cast<int>(params.m_PadLeft);
160  const int padRight = armnn::numeric_cast<int>(params.m_PadRight);
161  const int padTop = armnn::numeric_cast<int>(params.m_PadTop);
162  const int padBottom = armnn::numeric_cast<int>(params.m_PadBottom);
163  const int strideX = armnn::numeric_cast<int>(params.m_StrideX);
164  const int strideY = armnn::numeric_cast<int>(params.m_StrideY);
165  const int poolHeight = armnn::numeric_cast<int>(params.m_PoolHeight);
166  const int poolWidth = armnn::numeric_cast<int>(params.m_PoolWidth);
167 
168  float defaultInitializer = DefaultInitializer(params.m_PoolType);
169 
170  Accumulator accumulate = GetAccumulator(params.m_PoolType);
171  Executor execute = GetExecutor(params.m_PoolType);
172 
173  // Check supported padding methods outside the loop to simplify
174  // the inner loop.
175  if (params.m_PaddingMethod != PaddingMethod::Exclude &&
176  params.m_PaddingMethod != PaddingMethod::IgnoreValue)
177  {
178  throw armnn::InvalidArgumentException("Unsupported padding type");
179  }
180 
181  const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
182 
183  for (int n = 0; n < batchSize; n++)
184  {
185  for (int c = 0; c < channels; c++)
186  {
187  for (int yOutput = 0; yOutput < heightOutput; yOutput++)
188  {
189  // Calculate values independent of the x axis
190  int hstart = (yOutput * strideY) - padTop;
191  int hend = hstart + poolHeight;
192  // Clamp the pooling region inside the valid input area (which includes the padding).
193  // This is necessary because the final pooling in a row may overlap beyond the padding.
194  hend = std::min(hend, heightInput + padBottom);
195 
196  int height = hend - hstart;
197  bool hclamped = ClampRange(hstart, hend, heightInput);
198 
199  for (int xOutput = 0; xOutput < widthOutput; xOutput++)
200  {
201  int wstart = (xOutput * strideX) - padLeft;
202  int wend = wstart + poolWidth;
203 
204  // Clamp the pooling region inside the valid input area (which includes the padding).
205  // This is necessary because the final pooling in a row may overlap beyond the padding.
206  wend = std::min(wend, widthInput + padRight);
207 
208  float result = defaultInitializer;
209  float poolAreaSize = armnn::numeric_cast<float>(height * (wend - wstart));
210 
211  // Special case: when the pooling kernel is over a padding region and the padding
212  // size is larger or equal to the kernel and the kernel only covers
213  // padding and no real values, then we initialize the result as zero
214  // by convention. This is because we need to choose a value here and
215  // all values we have are padding, which we ignore.
216  if (OnPaddingOnly(hstart, hend, heightInput) ||
217  OnPaddingOnly(wstart, wend, widthInput))
218  {
219  result = 0.0f;
220 
221  int outputIndex;
222 
223  if(dataLayout.GetDataLayout() == DataLayout::NHWC)
224  {
225  outputIndex = n * heightOutput * widthOutput * channels +
226  yOutput * widthOutput * channels +
227  xOutput * channels +
228  c;
229  }
230  else
231  {
232  outputIndex = n * heightOutput * widthOutput * channels +
233  c * heightOutput * widthOutput +
234  yOutput * widthOutput +
235  xOutput;
236  }
237 
238  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
239  rOutputEncoder.Set(result);
240  continue;
241  }
242 
243  bool clamped = hclamped |= ClampRange(wstart, wend, widthInput);
244 
245  if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
246  {
247  // When we exclude the padding, it means we calculate with a smaller
248  // kernel size, so I changed the divisor here.
249  poolAreaSize = armnn::numeric_cast<float>((hend - hstart) * (wend - wstart));
250  }
251 
252  for (auto yInput = hstart; yInput < hend; yInput++)
253  {
254  for (auto xInput = wstart; xInput < wend; xInput++)
255  {
256 
257  int inputIndex;
258  if(dataLayout.GetDataLayout() == DataLayout::NHWC)
259  {
260  inputIndex = n * heightInput * widthInput * channels +
261  yInput * widthInput * channels +
262  xInput * channels +
263  c;
264 
265  }
266  else
267  {
268  inputIndex = n * heightInput * widthInput * channels +
269  c * heightInput * widthInput +
270  yInput * widthInput +
271  xInput;
272  }
273 
274  accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
275  }
276  }
277 
278  execute(result, poolAreaSize);
279 
280  int outputIndex;
281 
282  if(dataLayout.GetDataLayout() == DataLayout::NHWC)
283  {
284  outputIndex = n * heightOutput * widthOutput * channels +
285  yOutput * widthOutput * channels +
286  xOutput * channels +
287  c;
288  }
289  else
290  {
291  outputIndex = n * heightOutput * widthOutput * channels +
292  c * heightOutput * widthOutput +
293  yOutput * widthOutput +
294  xOutput;
295  }
296 
297  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
298  rOutputEncoder.Set(result);
299  }
300  }
301  }
302  }
303 }

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Pooling2dDescriptor::m_DataLayout, Pooling2dDescriptor::m_PadBottom, Pooling2dDescriptor::m_PaddingMethod, Pooling2dDescriptor::m_PadLeft, Pooling2dDescriptor::m_PadRight, Pooling2dDescriptor::m_PadTop, Pooling2dDescriptor::m_PoolHeight, Pooling2dDescriptor::m_PoolType, Pooling2dDescriptor::m_PoolWidth, Pooling2dDescriptor::m_StrideX, Pooling2dDescriptor::m_StrideY, Pooling2d(), and Encoder< IType >::Set().

Referenced by Pooling2d(), and Pooling2dLayer::Pooling2dLayer().

◆ Pooling3d()

void Pooling3d ( Decoder< float > &  rInputDecoder,
Encoder< float > &  rOutputEncoder,
const TensorInfo inputInfo,
const TensorInfo outputInfo,
const Pooling3dDescriptor params 
)

Computes the Pooling3d operation.

Definition at line 172 of file Pooling3d.cpp.

177 {
178  const DataLayoutIndexed dataLayout(params.m_DataLayout);
179 
180  auto channelsIndex = dataLayout.GetChannelsIndex();
181 
182  auto depthIndex = dataLayout.GetDepthIndex();
183  auto heightIndex = dataLayout.GetHeightIndex();
184  auto widthIndex = dataLayout.GetWidthIndex();
185 
186  const int batchSize = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
187  const int channels = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
188 
189  const int depthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[depthIndex]);
190  const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
191  const int widthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
192 
193  const int depthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[depthIndex]);
194  const int heightInput = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
195  const int widthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
196 
197  const int padLeft = armnn::numeric_cast<int>(params.m_PadLeft);
198  const int padRight = armnn::numeric_cast<int>(params.m_PadRight);
199  const int padTop = armnn::numeric_cast<int>(params.m_PadTop);
200  const int padBottom = armnn::numeric_cast<int>(params.m_PadBottom);
201  const int padFront = armnn::numeric_cast<int>(params.m_PadFront);
202  const int padBack = armnn::numeric_cast<int>(params.m_PadBack);
203 
204  const int strideX = armnn::numeric_cast<int>(params.m_StrideX);
205  const int strideY = armnn::numeric_cast<int>(params.m_StrideY);
206  const int strideZ = armnn::numeric_cast<int>(params.m_StrideZ);
207 
208  const int poolHeight = armnn::numeric_cast<int>(params.m_PoolHeight);
209  const int poolWidth = armnn::numeric_cast<int>(params.m_PoolWidth);
210  const int poolDepth = armnn::numeric_cast<int>(params.m_PoolDepth);
211 
212  float defaultInitializer = DefaultInitializer(params.m_PoolType);
213  Accumulator accumulate = GetAccumulator(params.m_PoolType);
214  Executor execute = GetExecutor(params.m_PoolType);
215 
216  // Check supported padding methods outside the loop to simplify
217  // the inner loop.
218  if (params.m_PaddingMethod != PaddingMethod::Exclude &&
219  params.m_PaddingMethod != PaddingMethod::IgnoreValue)
220  {
221  throw armnn::InvalidArgumentException("Unsupported padding type");
222  }
223 
224  const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
225 
226  for (int n = 0; n < batchSize; n++)
227  {
228  for (int c = 0; c < channels; c++)
229  {
230  for (int zOutput = 0; zOutput < depthOutput; zOutput++)
231  {
232  // Calculate values independent of the x and y axis
233  int dstart = (zOutput * strideZ) - padFront;
234  int dend = dstart + poolDepth;
235  // Clamp the pooling region inside the valid input area (which includes the padding).
236  // This is necessary because the final pooling in a row may overlap beyond the padding.
237  dend = std::min(dend, depthInput + padBack);
238 
239  int depth = dend - dstart;
240  bool dclamped = ClampRange(dstart, dend, depthInput);
241  int depthClamped = dend - dstart;
242 
243  for (int yOutput = 0; yOutput < heightOutput; yOutput++)
244  {
245  int hstart = (yOutput * strideY) - padTop;
246  int hend = hstart + poolHeight;
247  // Clamp the pooling region inside the valid input area (which includes the padding).
248  // This is necessary because the final pooling in a row may overlap beyond the padding.
249  hend = std::min(hend, heightInput + padBottom);
250 
251  int height = hend - hstart;
252  bool hclamped = ClampRange(hstart, hend, heightInput);
253  int heightClamped = hend - hstart;
254 
255  for (int xOutput = 0; xOutput < widthOutput; xOutput++)
256  {
257  int wstart = (xOutput * strideX) - padLeft;
258  int wend = wstart + poolWidth;
259  // Clamp the pooling region inside the valid input area (which includes the padding).
260  // This is necessary because the final pooling in a row may overlap beyond the padding.
261  wend = std::min(wend, widthInput + padRight);
262 
263  int width = wend - wstart;
264  bool wclamped = ClampRange(wstart, wend, widthInput);
265  int widthClamped = wend - wstart;
266 
267  float result = defaultInitializer;
268  float poolAreaSize = armnn::numeric_cast<float>(depth * height * width);
269 
270  // Special case: when the pooling kernel is over a padding region and the padding
271  // size is larger or equal to the kernel and the kernel only covers
272  // padding and no real values, then we initialize the result as zero
273  // by convention. This is because we need to choose a value here and
274  // all values we have are padding, which we ignore.
275  if (OnPaddingOnly(dstart, dend, depthInput) ||
276  OnPaddingOnly(hstart, hend, heightInput) ||
277  OnPaddingOnly(wstart, wend, widthInput))
278  {
279  result = 0.0f;
280 
281  int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
282  n, c, zOutput, yOutput, xOutput, dataLayout);
283 
284  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
285  rOutputEncoder.Set(result);
286 
287  continue;
288  }
289 
290  bool clamped = (dclamped | hclamped | wclamped);
291 
292  if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
293  {
294  // When we exclude the padding, it means we calculate with a smaller
295  // kernel size, so I changed the divisor here.
296  poolAreaSize = armnn::numeric_cast<float>(depthClamped * heightClamped * widthClamped);
297  }
298 
299  for (auto zInput = dstart; zInput < dend; zInput++)
300  {
301  for (auto yInput = hstart; yInput < hend; yInput++)
302  {
303  for (auto xInput = wstart; xInput < wend; xInput++)
304  {
305 
306  int inputIndex = CalculateIndex(channels, depthInput, heightInput, widthInput,
307  n, c, zInput, yInput, xInput, dataLayout);
308 
309  accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
310  }
311  }
312  }
313 
314  execute(result, poolAreaSize);
315 
316  int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
317  n, c, zOutput, yOutput, xOutput, dataLayout);
318 
319  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
320  rOutputEncoder.Set(result);
321  }
322  }
323  }
324  }
325  }
326 }

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDepthIndex(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Pooling3dDescriptor::m_DataLayout, Pooling3dDescriptor::m_PadBack, Pooling3dDescriptor::m_PadBottom, Pooling3dDescriptor::m_PaddingMethod, Pooling3dDescriptor::m_PadFront, Pooling3dDescriptor::m_PadLeft, Pooling3dDescriptor::m_PadRight, Pooling3dDescriptor::m_PadTop, Pooling3dDescriptor::m_PoolDepth, Pooling3dDescriptor::m_PoolHeight, Pooling3dDescriptor::m_PoolType, Pooling3dDescriptor::m_PoolWidth, Pooling3dDescriptor::m_StrideX, Pooling3dDescriptor::m_StrideY, Pooling3dDescriptor::m_StrideZ, Pooling3d(), and Encoder< IType >::Set().

Referenced by Pooling3d(), and Pooling3dLayer::Pooling3dLayer().

◆ PreluImpl()

void PreluImpl ( const TensorInfo inputInfo,
const TensorInfo alphaInfo,
const TensorInfo outputInfo,
Decoder< float > &  inputData,
Decoder< float > &  alphaData,
Encoder< float > &  outputData 
)

Definition at line 13 of file PreluImpl.cpp.

19 {
20  const TensorShape& inputShape = inputInfo.GetShape();
21  const TensorShape& alphaShape = alphaInfo.GetShape();
22  const TensorShape& outputShape = outputInfo.GetShape();
23 
24  // PReLU activation: f(x) = alpha * x for x < 0, f(x) = x for x >= 0
25  auto prelu = [](float x, float alpha)
26  {
27  return x < 0 ? alpha * x : x;
28  };
29 
30  BroadcastLoop(inputShape, alphaShape, outputShape).Unroll(prelu, 0, inputData, alphaData, outputData);
31 }

References TensorInfo::GetShape(), and BroadcastLoop::Unroll().

◆ PrintOutput()

void armnn::PrintOutput ( const TensorInfo inputInfo,
const T *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
std::ostream &  os 
)

Definition at line 23 of file Debug.cpp.

29 {
30  const unsigned int numDims = inputInfo.GetNumDimensions();
31  const unsigned int numElements = inputInfo.GetNumElements();
32  const TensorShape& inputShape = inputInfo.GetShape();
33 
34  std::vector<unsigned int> strides(numDims, 0);
35  strides[numDims - 1] = inputShape[numDims - 1];
36 
37  for (unsigned int i = 2; i <= numDims; i++)
38  {
39  strides[numDims - i] = strides[numDims - i + 1] * inputShape[numDims - i];
40  }
41 
42  os << "{ ";
43  os << "\"layerGuid\": " << guid << ", ";
44  os << "\"layerName\": \"" << layerName << "\", ";
45  os << "\"outputSlot\": " << slotIndex << ", ";
46  os << "\"shape\": ";
47 
48  os << "[";
49  for (unsigned int i = 0; i < numDims; i++)
50  {
51  os << inputShape[i];
52  if (i != numDims - 1)
53  {
54  os << ", ";
55  }
56  }
57  os << "], ";
58 
59  os << "\"min\": "
60  << static_cast<float>(*std::min_element(inputData, inputData + numElements)) << ", ";
61 
62  os << "\"max\": "
63  << static_cast<float>(*std::max_element(inputData, inputData + numElements)) << ", ";
64 
65  os << "\"data\": ";
66 
67  for (unsigned int i = 0; i < numElements; i++)
68  {
69  for (unsigned int j = 0; j < numDims; j++)
70  {
71  if (i % strides[j] == 0)
72  {
73  os << "[";
74  }
75  }
76 
77  os << static_cast<float>(inputData[i]);
78 
79  for (unsigned int j = 0; j < numDims; j++)
80  {
81  if ((i + 1) % strides[j] == 0)
82  {
83  os << "]";
84  }
85  }
86 
87  if (i != numElements - 1)
88  {
89  os << ", ";
90  }
91  }
92 
93  os << " }" << std::endl;
94 }

References TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), and TensorInfo::GetShape().

◆ ProfilingUpdateDescriptions()

void armnn::ProfilingUpdateDescriptions ( const std::string &  name,
const DescriptorType &  desc,
const WorkloadInfo infos,
const arm::pipe::ProfilingGuid  guid 
)
inline

< Profiler used

Definition at line 180 of file Profiling.hpp.

184 {
185  IProfiler* profiler(ProfilerManager::GetInstance().GetProfiler()); ///< Profiler used
186  if (profiler && profiler->IsProfilingEnabled())
187  {
188  profiler->AddLayerDetails(name, desc, infos, guid);
189  }
190 }

References ProfilerManager::GetInstance(), and IProfiler::IsProfilingEnabled().

◆ Quantize() [1/2]

template int32_t Quantize< int32_t > ( float  value,
float  scale,
int32_t  offset 
)

Quantize a floating point data type into an 8-bit data type.

Explicit specialization of Quantize for int32_t.

Explicit specialization of Quantize for int16_t.

Explicit specialization of Quantize for uint8_t.

Explicit specialization of Quantize for int8_t.

Parameters
value- The value to quantize.
scale- The scale (must be non-zero).
offset- The offset.
Returns
- The quantized value calculated as round(value/scale)+offset.

Definition at line 30 of file TypesUtils.cpp.

31 {
32  static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
33  constexpr QuantizedType max = std::numeric_limits<QuantizedType>::max();
34  constexpr QuantizedType min = std::numeric_limits<QuantizedType>::lowest();
35  if (std::isnan(value))
36  {
37  throw armnn::InvalidArgumentException("Quantize: Value is NaN");
38  }
39 
40  float clampedValue = std::min(std::max((static_cast<float>(offset) + static_cast<float>(round(value/scale))),
41  static_cast<float>(min)), static_cast<float>(max));
42  auto quantizedBits = static_cast<QuantizedType>(clampedValue);
43 
44  return quantizedBits;
45 }

◆ Quantize() [2/2]

void armnn::Quantize ( uint8_t *  quant,
const float *  dequant,
const TensorInfo info 
)
inline

Definition at line 121 of file RefWorkloadUtils.hpp.

122 {
123  for (size_t i = 0; i < info.GetNumElements(); i++)
124  {
125  quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
126  }
127 }

References info.

◆ Reduce()

void Reduce ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
Decoder< float > &  input,
Encoder< float > &  output,
const std::vector< uint32_t >  axis,
const ReduceOperation  reduceOperation 
)

Definition at line 70 of file Reduce.cpp.

76 {
77  armnn::TensorShape inputDims = inputInfo.GetShape();
78  unsigned int inputNumDims = inputInfo.GetNumDimensions();
79  unsigned int numOutputs = outputInfo.GetNumElements();
80 
81  // Initialise temp output
82  std::vector<float> tempOut(numOutputs);
83  switch(reduceOperation)
84  {
85  case ReduceOperation::Mean:
86  case ReduceOperation::Sum:
87  std::fill(tempOut.begin(), tempOut.end(), 0.0f);
88  break;
89  case ReduceOperation::Prod:
90  std::fill(tempOut.begin(), tempOut.end(), 1.0f);
91  break;
92  case ReduceOperation::Max:
93  std::fill(tempOut.begin(), tempOut.end(), -1 * std::numeric_limits<float>::max());
94  break;
95  case ReduceOperation::Min:
96  std::fill(tempOut.begin(), tempOut.end(), std::numeric_limits<float>::max());
97  break;
98  default:
99  throw armnn::InvalidArgumentException("Unknown reduce method: " +
100  std::to_string(static_cast<int>(reduceOperation)));
101  }
102 
103  // Initialise temp index
104  std::vector<unsigned int> tempIndex(inputNumDims, 0);
105 
106  std::vector<unsigned int> resolvedAxis = axis;
107  if (resolvedAxis.empty())
108  {
109  for (unsigned int idx = 0; idx < inputNumDims; ++idx)
110  {
111  resolvedAxis.push_back(idx);
112  }
113  }
114  auto numResolvedAxis = armnn::numeric_cast<unsigned int>(resolvedAxis.size());
115 
116  // Iterates through input_data and operates over the reduced axis
117  for (bool hasNext = true; hasNext; hasNext = NextIndex(inputNumDims, inputDims, tempIndex))
118  {
119  unsigned int inputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, 0, {});
120  unsigned int outputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex,
121  numResolvedAxis, resolvedAxis);
122  input[inputOffset];
123  auto inputValue = input.Get();
124  switch(reduceOperation)
125  {
126  case ReduceOperation::Mean:
127  case ReduceOperation::Sum:
128  tempOut[outputOffset] += inputValue;
129  break;
130  case ReduceOperation::Prod:
131  tempOut[outputOffset] *= inputValue;
132  break;
133  case ReduceOperation::Max:
134  if (inputValue > tempOut[outputOffset])
135  {
136  tempOut[outputOffset] = inputValue;
137  }
138  break;
139  case ReduceOperation::Min:
140  if (inputValue < tempOut[outputOffset])
141  {
142  tempOut[outputOffset] = inputValue;
143  }
144  break;
145  default:
146  throw armnn::InvalidArgumentException("Unknown reduce method: " +
147  std::to_string(static_cast<int>(reduceOperation)));
148  }
149  }
150 
151  // Takes average by num of elements added to get MEAN
152  size_t numElementsInAxis = 1;
153  for (unsigned int idx = 0; idx < numResolvedAxis; ++idx)
154  {
155  unsigned int current = inputDims[resolvedAxis[idx]];
156  numElementsInAxis *= current;
157  }
158 
159  for (unsigned int idx = 0; idx < numOutputs; ++idx)
160  {
161  output[idx];
162  if (reduceOperation == ReduceOperation::Mean)
163  {
164  if (numElementsInAxis > 0)
165  {
166  output.Set(tempOut[idx] / armnn::numeric_cast<float>(numElementsInAxis));
167  }
168  }
169  else
170  {
171  output.Set(tempOut[idx]);
172  }
173  }
174 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), Max, Mean, Min, NextIndex(), Prod, ReducedOutputOffset(), Encoder< IType >::Set(), and Sum.

◆ ReducedOutputOffset()

unsigned int armnn::ReducedOutputOffset ( const unsigned int  numDims,
const armnn::TensorShape dims,
std::vector< unsigned int > &  index,
const unsigned int  numAxis,
const std::vector< unsigned int > &  axis 
)

Definition at line 40 of file Reduce.cpp.

45 {
46  unsigned int offset = 0;
47  for (unsigned int idx = 0; idx < numDims; ++idx)
48  {
49  bool isAxis = false;
50  if (!axis.empty())
51  {
52  for (unsigned int axisIdx = 0; axisIdx < numAxis; ++axisIdx)
53  {
54  if (idx == axis[axisIdx])
55  {
56  isAxis = true;
57  break;
58  }
59  }
60  }
61  if (!isAxis)
62  {
63  offset = offset * dims[idx] + index[idx];
64  }
65  }
66  return offset;
67 }

Referenced by Reduce().

◆ RefBackendId()

constexpr const char* armnn::RefBackendId ( )
constexpr

Definition at line 10 of file RefBackendId.hpp.

10 { return "CpuRef"; }

Referenced by RefBackend::GetIdStatic().

◆ RefTensorHandleFactoryId()

constexpr const char* armnn::RefTensorHandleFactoryId ( )
constexpr

Definition at line 15 of file RefTensorHandleFactory.hpp.

15 { return "Arm/Ref/TensorHandleFactory"; }

Referenced by RefTensorHandleFactory::GetIdStatic().

◆ RemoveReshapeLayer()

void armnn::RemoveReshapeLayer ( ReshapeLayer baseLayer,
std::map< LayerGuid, Layer * > &  untouched,
OptimizationViews optimizationViews 
)
inline

Definition at line 293 of file SubgraphUtils.hpp.

296 {
297  if (baseLayer == nullptr)
298  {
299  return;
300  }
301  ReshapeDescriptor reshapeDescriptor = baseLayer->GetParameters();
302  Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
303 
304  // Cannot currently remove the Reshape if it's connected to an Input, Constant or Splitter
305  if (parentLayer.GetType() == LayerType::Input || parentLayer.GetType() == LayerType::Constant)
306  {
307  return;
308  }
309 
310  // Cannot currently remove the Reshape if it's connected to an OutputSlot or Concat
311  for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
312  {
313  Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
314 
315  if (nextLayer.GetType() == LayerType::Output)
316  {
317  return;
318  }
319  }
320  auto it = untouched.find(baseLayer->GetGuid());
321  if (it == untouched.end())
322  {
323  // Already removed from map
324  return;
325  }
326  untouched.erase(it);
327 
328  // Override the InputSlot TensorInfos for all the layers connected to the Reshape's OutputSlot
329  for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
330  {
331  Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
332  auto inputIndex = baseLayer->GetOutputSlot(0).GetConnection(i)->GetSlotIndex();
333  TensorInfo reshapeInfo(baseLayer->GetOutputSlot(0).GetTensorInfo());
334  reshapeInfo.SetShape(reshapeDescriptor.m_TargetShape);
335  nextLayer.GetInputSlot(inputIndex).SetTensorInfo(reshapeInfo);
336  }
337  optimizationViews.AddDeletedSubgraph(baseLayer);
338 }

References OptimizationViews::AddDeletedSubgraph(), Constant, InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), Layer::GetGuid(), Layer::GetInputSlot(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), LayerWithParameters< Parameters >::GetParameters(), InputSlot::GetSlotIndex(), OutputSlot::GetTensorInfo(), Layer::GetType(), Input, ReshapeDescriptor::m_TargetShape, Output, TensorInfo::SetShape(), and InputSlot::SetTensorInfo().

Referenced by RefBackend::OptimizeSubgraphView(), NeonBackend::OptimizeSubgraphView(), and ClBackend::OptimizeSubgraphView().

◆ ReorderWeightChannelsForAcl()

ConstTensor armnn::ReorderWeightChannelsForAcl ( const ConstTensor weightHandle,
DataLayout  dataLayout,
void *  permuteBuffer 
)

Definition at line 74 of file WorkloadUtils.cpp.

75 {
76  DataType* weight = static_cast<DataType*>(permuteBuffer);
77  const TensorShape& weightShape = weightHandle.GetShape();
78  unsigned int multiplier;
79  unsigned int height;
80  unsigned int width;
81  unsigned int inputChannels;
82  switch (dataLayout)
83  {
84  case DataLayout::NHWC: //It actually is [ H, W, I, M ]
85  height = weightShape[0];
86  width = weightShape[1];
87  inputChannels = weightShape[2];
88  multiplier = weightShape[3];
89  break;
90  case DataLayout::NCHW: //It actually is [ M, I, H, W ]
91  default:
92  height = weightShape[2];
93  width = weightShape[3];
94  inputChannels = weightShape[1];
95  multiplier = weightShape[0];
96  break;
97  }
98 
99  std::vector<DataType> weightAclOrder(height*width*inputChannels*multiplier);
100  unsigned int destinationWeightsChannel;
101  unsigned int totalChannels = inputChannels * multiplier;
102  unsigned int channelSize = height * width;
103  unsigned int inputChannel = 0;
104 
105  for (unsigned int originWeightsChannel = 0; originWeightsChannel < totalChannels; originWeightsChannel++)
106  {
107  inputChannel = originWeightsChannel % inputChannels;
108  destinationWeightsChannel = (originWeightsChannel - inputChannel) / inputChannels + multiplier * inputChannel;
109 
110  for (unsigned int i = 0; i < channelSize; i++)
111  {
112  weightAclOrder[i + destinationWeightsChannel * channelSize] =
113  weight[i + originWeightsChannel * channelSize];
114  }
115  }
116 
117  ::memcpy(permuteBuffer, weightAclOrder.data(), weightHandle.GetInfo().GetNumBytes());
118  return ConstTensor(weightHandle.GetInfo(), permuteBuffer);
119 }

References BaseTensor< MemoryType >::GetInfo(), TensorInfo::GetNumBytes(), BaseTensor< MemoryType >::GetShape(), NCHW, and NHWC.

◆ ReplaceLayers()

void armnn::ReplaceLayers ( OptimizationViews optimizationViews,
LayerType baseLayer,
std::vector< IConnectableLayer * > &  layers 
)

Definition at line 339 of file ArmComputeSubgraphUtils.hpp.

342 {
343  std::list<IConnectableLayer*> replacementLayers(layers.begin(), layers.end());
344 
345  SubgraphView substitutionSubgraph(baseLayer);
346  SubgraphView replacementSubgraph(std::move(replacementLayers),
347  CreateIInputsFrom({replacementLayers.front()}),
348  CreateIOutputsFrom({replacementLayers.back()}));
349 
350  optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
351 }

References OptimizationViews::AddSubstitution().

◆ ReplaceMultipleLayers()

void armnn::ReplaceMultipleLayers ( OptimizationViews optimizationViews,
std::vector< IConnectableLayer * > &  originalLayers,
LayerType baseLayer,
const std::vector< SlotList >  inputLayersSlotLists,
const std::vector< SlotList >  outputLayersSlotLists 
)

Definition at line 357 of file ArmComputeSubgraphUtils.hpp.

362 {
363  std::list<IConnectableLayer*> originalLayerList(originalLayers.begin(), originalLayers.end());
364 
365  SubgraphView substitutionSubgraph(
366  std::move(originalLayerList),
367  CreateIInputsFromSlotLists<armnn::IConnectableLayer>(originalLayers, inputLayersSlotLists),
368  CreateIOutputsFromSlotLists<armnn::IConnectableLayer>(originalLayers, outputLayersSlotLists));
369  SubgraphView replacementSubgraph(baseLayer);
370 
371  optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
372 }

References OptimizationViews::AddSubstitution().

◆ ReportError()

void armnn::ReportError ( const std::string &  errorMessage,
Optional< std::vector< std::string > & >  errorMessages 
)

Definition at line 762 of file Network.cpp.

764 {
765  std::stringstream fullErrorMessage;
766  fullErrorMessage << "ERROR: " << errorMessage;
767  ARMNN_LOG(warning) << fullErrorMessage.str();
768  if (errorMessages)
769  {
770  errorMessages.value().push_back(fullErrorMessage.str());
771  }
772 }

References ARMNN_LOG, and warning.

Referenced by AssignBackends(), CheckScaleSetOnQuantizedType(), Optimize(), and ReturnWithError().

◆ ReportUntouchedLayers()

void armnn::ReportUntouchedLayers ( OptimizationViews optimizationViews,
std::map< LayerGuid, Layer * >  untouched 
)
inline

Definition at line 220 of file SubgraphUtils.hpp.

221 {
222  std::vector<Layer*> untouchedVector;
223  for (const auto& pair : untouched)
224  {
225  Layer* layer = pair.second;
226  SubgraphView subgraphView({layer},
227  CreateIInputsFrom({layer}),
228  CreateIOutputsFrom({layer}));
229  optimizationViews.AddUntouchedSubgraph(std::move(subgraphView));
230  }
231 }

References OptimizationViews::AddUntouchedSubgraph().

Referenced by RefBackend::OptimizeSubgraphView(), NeonBackend::OptimizeSubgraphView(), ClBackend::OptimizeSubgraphView(), and GpuFsaBackend::OptimizeSubgraphView().

◆ ReportWarning()

void armnn::ReportWarning ( const std::string &  warningMessage,
Optional< std::vector< std::string > & >  warningMessages 
)

Definition at line 774 of file Network.cpp.

776 {
777  std::stringstream fullWarningMessage;
778  fullWarningMessage << "WARNING: " << warningMessage;
779  ARMNN_LOG(warning) << fullWarningMessage.str();
780  if (warningMessages)
781  {
782  warningMessages.value().push_back(fullWarningMessage.str());
783  }
784 }

References ARMNN_LOG, and warning.

Referenced by ApplyBackendOptimizations(), and AttemptBackendAssignment().

◆ RequiresCopy()

bool armnn::RequiresCopy ( ITensorHandleFactory::FactoryId  src,
ITensorHandleFactory::FactoryId  dst,
TensorHandleFactoryRegistry registry 
)

Definition at line 1454 of file Network.cpp.

1457 {
1458  if (src != dst)
1459  {
1460  ITensorHandleFactory* srcFactory = registry.GetFactory(src);
1461  ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
1462 
1463  if (srcFactory && dstFactory &&
1464  (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
1465  {
1466  return false;
1467  }
1468  return true;
1469  }
1470  return false;
1471 }

References ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), and ITensorHandleFactory::GetImportFlags().

Referenced by CalculateSlotOption().

◆ ReshapeWeightsForAcl()

void ReshapeWeightsForAcl ( TensorInfo weightInfo,
DataLayout  dataLayout 
)

Definition at line 48 of file WorkloadUtils.cpp.

49 {
50  // Reshape the weights in-place
51  const TensorShape& weightShape = weightInfo.GetShape();
52  switch (dataLayout)
53  {
54  case DataLayout::NHWC:
55  // The data layout is NHWC, reshape from [ H, W, I, M ] to [ 1, H, W, I * M ]
56  weightInfo.SetShape({ 1,
57  weightShape[0],
58  weightShape[1],
59  weightShape[2] * weightShape[3] });
60  weightInfo.SetShape({ 1,
61  weightShape[0] * weightShape[1],
62  weightShape[2],
63  weightShape[3] });
64  break;
65  case DataLayout::NCHW:
66  default:
67  // The data layout is NCHW, reshape from [ M, I, H, W ] to [ 1, I * M, H, W, ]
68  weightInfo.SetShape({ 1, weightShape[0] * weightShape[1], weightShape[2], weightShape[3] });
69  break;
70  }
71 }

References TensorInfo::GetShape(), NCHW, NHWC, and TensorInfo::SetShape().

Referenced by ConvertWeightTensorFromArmnnToAcl(), and ConvertWeightTensorInfoFromArmnnToAcl().

◆ Resize()

void Resize ( Decoder< float > &  in,
const TensorInfo inputInfo,
Encoder< float > &  out,
const TensorInfo outputInfo,
DataLayoutIndexed  dataLayout,
ResizeMethod  resizeMethod,
bool  alignCorners,
bool  halfPixelCenters 
)

Definition at line 65 of file Resize.cpp.

73 {
74  // alignCorners and halfPixelCenters cannot both be true
75  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(!(alignCorners && halfPixelCenters),
76  "Resize: alignCorners and halfPixelCenters cannot both be true");
77 
78  // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
79  // image is projected into the input image to figure out the interpolants and weights. Note that this
80  // will yield different results than if projecting the centre of output texels.
81 
82  const unsigned int batchSize = inputInfo.GetShape()[0];
83  const unsigned int channelCount = inputInfo.GetShape()[dataLayout.GetChannelsIndex()];
84 
85  const unsigned int inputHeight = inputInfo.GetShape()[dataLayout.GetHeightIndex()];
86  const unsigned int inputWidth = inputInfo.GetShape()[dataLayout.GetWidthIndex()];
87  const unsigned int outputHeight = outputInfo.GetShape()[dataLayout.GetHeightIndex()];
88  const unsigned int outputWidth = outputInfo.GetShape()[dataLayout.GetWidthIndex()];
89 
90  // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates
91  // in the input image.
92  const float scaleY = CalculateResizeScale(inputHeight, outputHeight, alignCorners);
93  const float scaleX = CalculateResizeScale(inputWidth, outputWidth, alignCorners);
94 
95  const TensorShape& inputShape = inputInfo.GetShape();
96  const TensorShape& outputShape = outputInfo.GetShape();
97 
98  for (unsigned int n = 0; n < batchSize; ++n)
99  {
100  for (unsigned int c = 0; c < channelCount; ++c)
101  {
102  for (unsigned int y = 0; y < outputHeight; ++y)
103  {
104  // Corresponding real-valued height coordinate in input image.
105  float iy = PixelScaler(y, scaleY, halfPixelCenters, resizeMethod);
106 
107  // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
108  const float fiy = (resizeMethod == ResizeMethod::NearestNeighbor && alignCorners) ? armnn::roundf(iy)
109  : floorf(iy);
110  // Pixel scaling a value with Half Pixel Centers can be negative, if so set to 0
111  const unsigned int y0 = static_cast<unsigned int>(std::max(fiy, 0.0f));
112 
113  // Interpolation weight (range [0,1]).
114  const float yw = iy - fiy;
115 
116  for (unsigned int x = 0; x < outputWidth; ++x)
117  {
118  // Real-valued and discrete width coordinates in input image.
119  float ix = PixelScaler(x, scaleX, halfPixelCenters, resizeMethod);
120 
121  // Nearest Neighbour uses rounding to align to corners
122  const float fix = resizeMethod == ResizeMethod::NearestNeighbor && alignCorners ? armnn::roundf(ix)
123  : floorf(ix);
124  // Pixel scaling a value with Half Pixel Centers can be negative, if so set to 0
125  const unsigned int x0 = static_cast<unsigned int>(std::max(fix, 0.0f));
126 
127  // Interpolation weight (range [0,1]).
128  const float xw = ix - fix;
129 
130  unsigned int x1;
131  unsigned int y1;
132  // Half Pixel Centers uses the scaling to compute a weighted parameter for nearby pixels
133  if (halfPixelCenters)
134  {
135  x1 = std::min(static_cast<unsigned int>(std::ceil(ix)), inputWidth - 1u);
136  y1 = std::min(static_cast<unsigned int>(std::ceil(iy)), inputHeight - 1u);
137  }
138  // Discrete width/height coordinates of texels below and to the right of (x0, y0).
139  else
140  {
141  x1 = std::min(x0 + 1, inputWidth - 1u);
142  y1 = std::min(y0 + 1, inputHeight - 1u);
143  }
144 
145  float interpolatedValue;
146  switch (resizeMethod)
147  {
148  case ResizeMethod::Bilinear:
149  {
150  in[dataLayout.GetIndex(inputShape, n, c, y0, x0)];
151  float input1 = in.Get();
152  in[dataLayout.GetIndex(inputShape, n, c, y0, x1)];
153  float input2 = in.Get();
154  in[dataLayout.GetIndex(inputShape, n, c, y1, x0)];
155  float input3 = in.Get();
156  in[dataLayout.GetIndex(inputShape, n, c, y1, x1)];
157  float input4 = in.Get();
158 
159  const float ly0 = Lerp(input1, input2, xw); // lerp along row y0.
160  const float ly1 = Lerp(input3, input4, xw); // lerp along row y1.
161  interpolatedValue = Lerp(ly0, ly1, yw);
162  break;
163  }
164  case ResizeMethod::NearestNeighbor:
165  {
166  // calculate euclidean distance to the 4 neighbours
167  auto distance00 = EuclideanDistance(fix, fiy, x0, y0);
168  auto distance01 = EuclideanDistance(fix, fiy, x0, y1);
169  auto distance10 = EuclideanDistance(fix, fiy, x1, y0);
170  auto distance11 = EuclideanDistance(fix, fiy, x1, y1);
171 
172  auto minimum = std::min( { distance00, distance01, distance10, distance11 } );
173 
174  unsigned int xNearest = 0;
175  unsigned int yNearest = 0;
176 
177  if (minimum == distance00)
178  {
179  xNearest = x0;
180  yNearest = y0;
181  }
182  else if (minimum == distance01)
183  {
184  xNearest = x0;
185  yNearest = y1;
186  }
187  else if (minimum == distance10)
188  {
189  xNearest = x1;
190  yNearest = y0;
191  }
192  else if (minimum == distance11)
193  {
194  xNearest = x1;
195  yNearest = y1;
196  }
197  else
198  {
199  throw InvalidArgumentException("Resize Nearest Neighbor failure");
200  }
201 
202  in[dataLayout.GetIndex(inputShape, n, c, yNearest, xNearest)];
203  interpolatedValue = in.Get();
204  break;
205  }
206  default:
207  throw InvalidArgumentException("Unknown resize method: " +
208  std::to_string(static_cast<int>(resizeMethod)));
209  }
210  out[dataLayout.GetIndex(outputShape, n, c, y, x)];
211  out.Set(interpolatedValue);
212  }
213  }
214  }
215  }
216 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Resize(), roundf(), and Encoder< IType >::Set().

Referenced by Resize(), and ResizeLayer::ResizeLayer().

◆ ReturnWithError()

OptimizationResult armnn::ReturnWithError ( OptimizationResult  res,
const Layer layer,
const BackendSettings backendSettings,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 786 of file Network.cpp.

790 {
791  std::stringstream failureMsg;
792  failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
793  << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
794  ReportError(failureMsg.str(), errMessages);
795 
796  res.m_Error = true;
797  return res;
798 }

References GetLayerTypeAsCString(), Layer::GetType(), OptimizationResult::m_Error, BackendSettings::m_PreferredBackends, and ReportError().

Referenced by AssignBackendsIConnectable(), and AttemptBackendAssignment().

◆ ReverseGetFlatIdx()

unsigned int armnn::ReverseGetFlatIdx ( const std::vector< unsigned int > &  idxList,
unsigned int  inputRank,
std::vector< unsigned int > &  elementNumInner 
)

Definition at line 34 of file ReverseV2Impl.cpp.

37 {
38  unsigned int idx = 0;
39 
40  for (unsigned int iDim = 0; iDim < inputRank; ++iDim)
41  {
42  idx += idxList[iDim] * elementNumInner[iDim];
43  }
44 
45  return idx;
46 }

Referenced by ReverseRelocateIdx().

◆ ReverseGetMultIdx()

std::vector<unsigned int> armnn::ReverseGetMultIdx ( const unsigned int  idx,
unsigned int  inputRank,
std::vector< unsigned int > &  elementNumInner 
)

Definition at line 16 of file ReverseV2Impl.cpp.

19 {
20  std::vector<unsigned int> indexList(inputRank);
21 
22  unsigned int mIdx = idx;
23 
24  for (unsigned int iDim = 0; iDim < inputRank; ++iDim)
25  {
26  indexList[iDim] = static_cast<unsigned int>(mIdx / elementNumInner[iDim]);
27  mIdx %= elementNumInner[iDim];
28  }
29 
30  return indexList;
31 }

Referenced by ReverseRelocateIdx().

◆ ReverseRelocateIdx()

unsigned int armnn::ReverseRelocateIdx ( unsigned int  idx,
unsigned int  inputRank,
std::vector< bool > &  axisFlag,
std::vector< unsigned int > &  dimSize,
std::vector< unsigned int > &  elementNumInner 
)

Definition at line 49 of file ReverseV2Impl.cpp.

54 {
55  // Get the multidimensional index list for input
56  auto inputIdxList = ReverseGetMultIdx(idx, inputRank, elementNumInner);
57 
58  std::vector<unsigned int> outputIdxList(inputRank);
59 
60  // Relocate the input index to the output one
61  for (unsigned int iDim = 0; iDim < inputRank; ++iDim)
62  {
63  if (axisFlag[iDim])
64  {
65  outputIdxList[iDim] = dimSize[iDim] - inputIdxList[iDim] - 1;
66  }
67  else
68  {
69  outputIdxList[iDim] = inputIdxList[iDim];
70  }
71  }
72 
73  // Get the 1-dimensional flattened index for output
74  unsigned int outputIdx = ReverseGetFlatIdx(outputIdxList, inputRank, elementNumInner);
75  return outputIdx;
76 }

References ReverseGetFlatIdx(), and ReverseGetMultIdx().

Referenced by ReverseV2().

◆ ReverseV2()

void ReverseV2 ( const TensorInfo inputInfo,
const TensorInfo axisInfo,
Decoder< float > &  inputDecoder,
Decoder< int > &  axisDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 78 of file ReverseV2Impl.cpp.

83 {
84  unsigned int axesRank = static_cast<unsigned int>(axisInfo.GetNumElements());
85 
86  // Empty axis and empty tensor case: copy input to output
87  if ((axesRank == 0) || inputInfo.GetNumElements() == 0)
88  {
89  for (unsigned idx = 0; idx < inputInfo.GetNumElements(); idx++)
90  {
91  float inputValue = inputDecoder.Get();
92  inputDecoder += 1;
93  outputEncoder.Set(inputValue);
94  outputEncoder += 1;
95  }
96  return;
97  }
98 
99  unsigned int inputRank = static_cast<unsigned int>(inputInfo.GetNumDimensions());
100 
101  std::vector<bool> axisFlag(inputRank, false);
102  std::vector<unsigned int> dimSize(inputRank, 0);
103  std::vector<int32_t> axis(axesRank, 0);
104 
105  // Decode the axis information
106  for (unsigned int i=0; i < axesRank; i++)
107  {
108  axis[i] = axisDecoder.Get();
109  axisDecoder += 1;
110  }
111 
112  // Make sure the axes are positive
113  for (int32_t axisElement: axis)
114  {
115  axisElement = axisElement < 0 ? axisElement + static_cast<int32_t>(inputRank) : axisElement;
116  axisFlag[static_cast<uint32_t>(axisElement)] = true;
117  }
118 
119  const TensorShape &inputShape = inputInfo.GetShape();
120 
121  unsigned int elementNum = inputInfo.GetNumElements();
122  unsigned int baseDimSize = 1;
123 
124  std::vector<unsigned int> elementNumInner;
125 
126  // Get the number of element within the specific dimension
127  for (unsigned int iDim = 0; iDim < inputRank; ++iDim) {
128  dimSize[iDim] = inputShape[iDim];
129  baseDimSize *= dimSize[iDim];
130  elementNumInner.push_back(static_cast<unsigned int>(elementNum / baseDimSize));
131  }
132 
133  // Iterate through all elements
134  for (unsigned int idx = 0; idx < elementNum; ++idx)
135  {
136  float inputValue = inputDecoder.Get();
137  inputDecoder += 1;
138  auto outputIdx = ReverseRelocateIdx(idx, inputRank, axisFlag, dimSize, elementNumInner);
139  outputEncoder[outputIdx];
140  outputEncoder.Set(inputValue);
141  }
142 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), ReverseRelocateIdx(), and Encoder< IType >::Set().

◆ RevertConstantWeightsToFP32()

bool armnn::RevertConstantWeightsToFP32 ( Layer layer)

◆ roundf()

float armnn::roundf ( float  value)
inline

Definition at line 43 of file Utils.hpp.

44 {
45  // Workaround Valgrind's mismatches: when running from Valgrind the call to std::round(4.5) == 4.0 instead of 5.0
46  return (value < 0.f) ? ::floorf(value - 0.5f) : ::floorf(value + 0.5f);
47 }

Referenced by Resize().

◆ RunClFunction()

void RunClFunction ( arm_compute::IFunction &  function,
const CheckLocation location 
)
inline

Definition at line 167 of file ClWorkloadUtils.hpp.

168 {
169  try
170  {
171  function.run();
172  }
173  catch (cl::Error& error)
174  {
175  throw WrapClError(error, location);
176  }
177 }

References error, and WrapClError().

Referenced by ClFillWorkload::Execute(), ClPadWorkload::Execute(), ClAdditionWorkload::Execute(), ClSubtractionWorkload::Execute(), ClActivationWorkload::Execute(), ClNegWorkload::Execute(), ClCastWorkload::Execute(), ClExpWorkload::Execute(), ClPreluWorkload::Execute(), ClConvertFp16ToFp32Workload::Execute(), ClQuantizeWorkload::Execute(), ClRsqrtWorkload::Execute(), ClSinWorkload::Execute(), ClAbsWorkload::Execute(), ClSqrtWorkload::Execute(), ClConvertFp32ToFp16Workload::Execute(), ClLogWorkload::Execute(), ClLstmFloatWorkload::Execute(), ClNormalizationFloatWorkload::Execute(), ClFloorFloatWorkload::Execute(), ClReshapeWorkload::Execute(), ClResizeWorkload::Execute(), ClGatherWorkload::Execute(), ClSpaceToDepthWorkload::Execute(), ClInstanceNormalizationWorkload::Execute(), ClMaximumWorkload::Execute(), ClMinimumWorkload::Execute(), ClArgMinMaxWorkload::Execute(), ClChannelShuffleWorkload::Execute(), ClL2NormalizationFloatWorkload::Execute(), ClComparisonWorkload::Execute(), ClBatchMatMulWorkload::Execute(), ClSliceWorkload::Execute(), ClSpaceToBatchNdWorkload::Execute(), ClDepthToSpaceWorkload::Execute(), ClDivisionWorkload::Execute(), ClPooling2dWorkload::Execute(), ClPooling3dWorkload::Execute(), ClGatherNdWorkload::Execute(), ClStridedSliceWorkload::Execute(), ClBatchToSpaceNdWorkload::Execute(), ClMultiplicationWorkload::Execute(), ClPermuteWorkload::Execute(), ClScatterNdWorkload::Execute(), ClTransposeWorkload::Execute(), ClQuantizedLstmWorkload::Execute(), ClLogSoftmaxWorkload::Execute(), ClSoftmaxWorkload::Execute(), ClDepthwiseConvolutionWorkload::Execute(), ClBatchNormalizationFloatWorkload::Execute(), ClConvolution3dWorkload::Execute(), ClFullyConnectedWorkload::Execute(), ClTransposeConvolution2dWorkload::Execute(), and ClConvolution2dWorkload::Execute().

◆ ScatterNd() [1/2]

void ScatterNd ( const TensorInfo indicesInfo,
const TensorInfo updatesInfo,
const TensorInfo shapeInfo,
Decoder< int > &  indices,
Decoder< float > &  updates,
Decoder< int > &  shape,
Encoder< float > &  output,
ScatterNdDescriptor  descriptor 
)

Definition at line 181 of file ScatterNd.cpp.

189 {
190  // Axis Unsupported
191  if (descriptor.m_AxisEnabled)
192  {
193  throw InvalidArgumentException("ScatterNd: axis param not supported.");
194  }
195 
196  // Get the shape for indices, updates, and input
197  TensorShape indicesShape = indicesInfo.GetShape();
198  TensorShape updatesShape = updatesInfo.GetShape();
199 
200  // Get the shape values
201  std::vector<float> shapeValues = shape.DecodeTensor(shapeInfo.GetShape());
202  // Check the shape
203  if (shapeInfo.GetNumElements() == 0)
204  {
205  throw InvalidArgumentException("ScatterNd: shape must have values.");
206  }
207  for (auto shapeValue : shapeValues)
208  {
209  if (shapeValue <= 0)
210  {
211  throw InvalidArgumentException("ScatterNd: shape values must >= 0.");
212  }
213  }
214  // Get the input shape
215  std::vector<unsigned int> inputShape (shapeValues.begin(), shapeValues.end());
216  unsigned int inputElementsNum = static_cast<unsigned int>(
217  std::accumulate(inputShape.begin(), inputShape.end(), 1, std::multiplies<unsigned int>()));
218 
219  // Get the dimensions for indices and updates
220  unsigned int dimension = shapeInfo.GetNumElements();
221  unsigned int indicesDim = indicesInfo.GetNumDimensions();
222  unsigned int updatesDim = updatesInfo.GetNumDimensions();
223 
224  // Calculate the outter and inner dimensions
225  unsigned int outterDim = indicesShape[indicesDim - 1];
226  unsigned int innerDim = dimension - outterDim;
227 
228  // Calculate the number of elements in each dimension
229  unsigned int numElementsCount = 1;
230  std::vector<unsigned int> elementInDim(dimension);
231  for (unsigned int dimIndex = dimension; dimIndex > 0; --dimIndex)
232  {
233  elementInDim[dimIndex - 1] = numElementsCount;
234  numElementsCount *= inputShape[dimIndex - 1];
235  }
236 
237  // Number of updates per index
238  unsigned int numUpdatesPerIndex = elementInDim[dimension - innerDim - 1];
239 
240  // Number of indices to update
241  unsigned int numIndices = indicesShape[0];
242 
243  // Check Input Requirements
244  // Requirement 1: Indices and Updates must have rank at least 1
245  if (indicesDim < 1 || updatesDim < 1)
246  {
247  throw InvalidArgumentException("ScatterNd: indices and updates must have rank >= 1.");
248  }
249 
250  // Requirement 2: shape, Indices and Updates must have values
251  if (indicesInfo.GetNumElements() == 0 ||
252  updatesInfo.GetNumElements() == 0)
253  {
254  throw InvalidArgumentException("ScatterNd: indices and updates tensor must have values.");
255  }
256 
257  // Requirement 3: Indices and Updates must match in shape
258  // The updates dimension should equals to 1 + inner dimension
259  if (updatesDim != 1 + innerDim)
260  {
261  throw InvalidArgumentException("ScatterNd: updates dimension should equal to 1 + inner dimension.");
262  }
263  // The inner dimension of updates has to match with shape of input
264  for (unsigned int dimBackIndex = 0; dimBackIndex < innerDim; ++dimBackIndex)
265  {
266  if (updatesShape[updatesDim - dimBackIndex - 1] != inputShape[dimension - dimBackIndex - 1])
267  {
268  throw InvalidArgumentException(
269  fmt::format("ScatterNd: input and updates shape not match on dimension {}",
270  dimension - dimBackIndex));
271  }
272  }
273 
274  // Requirement 4: Check duplicate indices and out of bound indices
275  std::set<int> indicesSet;
276  std::vector<int> flattenIndices(numIndices);
277  for (unsigned int indicesIdx = 0; indicesIdx < numIndices; ++indicesIdx)
278  {
279  // Get the index
280  int flattenIndex = 0;
281 
282  for (unsigned int outterIdx = 0; outterIdx < outterDim; ++outterIdx) {
283 
284  int outterIndexValue = indices.Get();
285 
286  // Check bounds
287  if (outterIndexValue < 0 || outterIndexValue >= int(inputShape[outterIdx]))
288  {
289  throw InvalidArgumentException(
290  fmt::format("ScatterNd: indices {} out of bound [0, {})",
291  outterIndexValue, inputShape[outterIdx]));
292  }
293 
294  flattenIndex += int(elementInDim[outterIdx]) * outterIndexValue;
295  ++indices;
296  }
297 
298  // Check duplicates when executing ScatterNd::Update
299  if (descriptor.m_Function == ScatterNdFunction::Update &&
300  indicesSet.find(flattenIndex) != indicesSet.end())
301  {
302  throw InvalidArgumentException(
303  fmt::format("ScatterNd: duplicate indices {} occurs when executing ScatterNd::Update.",
304  flattenIndex));
305  }
306 
307  flattenIndices[indicesIdx] = flattenIndex;
308  indicesSet.insert(flattenIndex);
309  }
310 
311  // Set zeros to output
312  for (unsigned int idx = 0; idx < inputElementsNum; ++idx)
313  {
314  output.Set(0.0f);
315  ++output;
316  }
317 
318  // Iterate through all indices to scatter updates
319  for (unsigned int indicesIdx = 0; indicesIdx < numIndices; ++indicesIdx)
320  {
321  // Get the index and calculate the flatten index
322  int flattenIndex = flattenIndices[indicesIdx];
323 
324  // FlattenIndex is the place that we are going to update the elements
325  unsigned int updatesStartIdx = indicesIdx * numUpdatesPerIndex;
326  for (unsigned int updatesIdx = 0; updatesIdx < numUpdatesPerIndex; ++updatesIdx)
327  {
328  updates[updatesStartIdx + updatesIdx];
329  float updateValue = ScatterOperation(descriptor.m_Function, 0.0f, updates.Get());
330  output[static_cast<unsigned int>(flattenIndex) + updatesIdx];
331  output.Set(updateValue);
332  }
333  }
334 }

References Decoder< IType >::DecodeTensor(), Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), ScatterNdDescriptor::m_AxisEnabled, ScatterNdDescriptor::m_Function, ScatterOperation(), Encoder< IType >::Set(), and Update.

◆ ScatterNd() [2/2]

void ScatterNd ( const TensorInfo inputInfo,
const TensorInfo indicesInfo,
const TensorInfo updatesInfo,
Decoder< float > &  input,
Decoder< int > &  indices,
Decoder< float > &  updates,
Encoder< float > &  output,
ScatterNdDescriptor  descriptor 
)

Definition at line 41 of file ScatterNd.cpp.

49 {
50  // Axis Unsupported
51  if (descriptor.m_AxisEnabled)
52  {
53  throw InvalidArgumentException("ScatterNd: axis param not supported.");
54  }
55 
56  // Get the shape for indices, updates, and input
57  TensorShape indicesShape = indicesInfo.GetShape();
58  TensorShape updatesShape = updatesInfo.GetShape();
59  TensorShape inputShape = inputInfo.GetShape();
60 
61  // Get the dimensions for indices and updates
62  unsigned int dimension = inputInfo.GetNumDimensions();
63  unsigned int indicesDim = indicesInfo.GetNumDimensions();
64  unsigned int updatesDim = updatesInfo.GetNumDimensions();
65 
66  // Calculate the outter and inner dimensions
67  unsigned int outterDim = indicesShape[indicesDim - 1];
68  unsigned int innerDim = dimension - outterDim;
69 
70  // Calculate the number of elements in each dimension
71  unsigned int numElementsCount = 1;
72  std::vector<unsigned int> elementInDim(dimension);
73  for (unsigned int dimIndex = dimension; dimIndex > 0; --dimIndex)
74  {
75  elementInDim[dimIndex - 1] = numElementsCount;
76  numElementsCount *= inputShape[dimIndex - 1];
77  }
78 
79  // Number of updates per index
80  unsigned int numUpdatesPerIndex = elementInDim[dimension - innerDim - 1];
81 
82  // Number of indices to update
83  unsigned int numIndices = indicesShape[0];
84 
85  // Check Input Requirements
86  // Requirement 1: Indices and Updates must have rank at least 1
87  if (indicesDim < 1 || updatesDim < 1)
88  {
89  throw InvalidArgumentException("ScatterNd: indices and updates must have rank >= 1.");
90  }
91 
92  // Requirement 2: Input, Indices and Updates must have values
93  if (inputInfo.GetNumElements() == 0 ||
94  indicesInfo.GetNumElements() == 0 ||
95  updatesInfo.GetNumElements() == 0)
96  {
97  throw InvalidArgumentException("ScatterNd: input, indices and updates tensor must have values.");
98  }
99 
100  // Requirement 3: Indices and Updates must match in shape
101  // The updates dimension should equals to 1 + inner dimension
102  if (updatesDim != 1 + innerDim)
103  {
104  throw InvalidArgumentException("ScatterNd: updates dimension should equal to 1 + inner dimension.");
105  }
106  // The inner dimension of updates has to match with shape of input
107  for (unsigned int dimBackIndex = 0; dimBackIndex < innerDim; ++dimBackIndex)
108  {
109  if (updatesShape[updatesDim - dimBackIndex - 1] != inputShape[dimension - dimBackIndex - 1])
110  {
111  throw InvalidArgumentException(
112  fmt::format("ScatterNd: input and updates shape not match on dimension {}",
113  dimension - dimBackIndex));
114  }
115  }
116 
117  // Requirement 4: Check duplicate indices and out of bound indices
118  std::set<int> indicesSet;
119  std::vector<int> flattenIndices(numIndices);
120  for (unsigned int indicesIdx = 0; indicesIdx < numIndices; ++indicesIdx)
121  {
122  // Get the index
123  int flattenIndex = 0;
124 
125  for (unsigned int outterIdx = 0; outterIdx < outterDim; ++outterIdx) {
126 
127  int outterIndexValue = indices.Get();
128 
129  // Check bounds
130  if (outterIndexValue < 0 || outterIndexValue >= int(inputShape[outterIdx]))
131  {
132  throw InvalidArgumentException(
133  fmt::format("ScatterNd: indices {} out of bound [0, {})",
134  outterIndexValue, inputShape[outterIdx]));
135  }
136 
137  flattenIndex += int(elementInDim[outterIdx]) * outterIndexValue;
138  ++indices;
139  }
140 
141  // Check duplicates when executing ScatterNd::Update
142  if (descriptor.m_Function == ScatterNdFunction::Update &&
143  indicesSet.find(flattenIndex) != indicesSet.end())
144  {
145  throw InvalidArgumentException(
146  fmt::format("ScatterNd: duplicate indices occurs {}", flattenIndex));
147  }
148 
149  flattenIndices[indicesIdx] = flattenIndex;
150  indicesSet.insert(flattenIndex);
151  }
152 
153  // Set the input data to output
154  for (unsigned int idx = 0; idx < inputInfo.GetNumElements(); ++idx)
155  {
156  float inputValue = input.Get();
157  ++input;
158  output.Set(inputValue);
159  ++output;
160  }
161 
162  // Iterate through all indices to scatter updates
163  for (unsigned int indicesIdx = 0; indicesIdx < numIndices; ++indicesIdx)
164  {
165  // Get the index and calculate the flatten index
166  int flattenIndex = flattenIndices[indicesIdx];
167 
168  // FlattenIndex is the place that we are going to update the elements
169  unsigned int updatesStartIdx = indicesIdx * numUpdatesPerIndex;
170  for (unsigned int updatesIdx = 0; updatesIdx < numUpdatesPerIndex; ++updatesIdx)
171  {
172  updates[updatesStartIdx + updatesIdx];
173  input[static_cast<unsigned int>(flattenIndex) + updatesIdx];
174  float updateValue = ScatterOperation(descriptor.m_Function, input.Get(), updates.Get());
175  output[static_cast<unsigned int>(flattenIndex) + updatesIdx];
176  output.Set(updateValue);
177  }
178  }
179 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), ScatterNdDescriptor::m_AxisEnabled, ScatterNdDescriptor::m_Function, ScatterOperation(), Encoder< IType >::Set(), and Update.

◆ ScatterOperation()

float armnn::ScatterOperation ( ScatterNdFunction  operation,
float  input,
float  update 
)

Definition at line 18 of file ScatterNd.cpp.

21 {
22  switch (operation)
23  {
24  case ScatterNdFunction::Update:
25  return update;
26  case ScatterNdFunction::Add:
27  return input + update;
28  case ScatterNdFunction::Sub:
29  return input - update;
30  case ScatterNdFunction::Max:
31  return std::max(input, update);
32  case ScatterNdFunction::Min:
33  return std::min(input, update);
34  case ScatterNdFunction::Mul:
35  return input * update;
36  default:
37  throw InvalidArgumentException("ScatterNd: cannot execute this operation.");
38  }
39 }

References Add, Max, Min, Mul, Sub, and Update.

Referenced by ScatterNd().

◆ SelectTensorHandleStrategy()

OptimizationResult SelectTensorHandleStrategy ( Graph optGraph,
BackendsMap backends,
TensorHandleFactoryRegistry registry,
bool  importEnabled,
bool  exportEnabled,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 1821 of file Network.cpp.

1827 {
1828  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_SelectTensorHandleStrategy");
1829  OptimizationResult result;
1830 
1831  optGraph.ForEachLayer([&backends, &registry, &result, &errMessages, importEnabled, exportEnabled](Layer* layer)
1832  {
1833  // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
1834  // assignment if this check fails
1835  if (backends.find(layer->GetBackendId()) == backends.end())
1836  {
1837  throw armnn::Exception("Backend id not found for the layer");
1838  }
1839 
1840  // Check each output separately
1841  for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
1842  {
1843  OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
1844 
1845  ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
1846 
1847  // Calculate the factory to use which results in the fewest copies being made.
1848  switch(layer->GetType())
1849  {
1850  case LayerType::Input:
1851  slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry, importEnabled);
1852  break;
1853  case LayerType::Output:
1854  slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
1855  break;
1856  default:
1857  slotOption = CalculateSlotOption(backends, outputSlot, registry, exportEnabled);
1858  break;
1859  }
1860  outputSlot.SetTensorHandleFactory(slotOption);
1861 
1862  // Now determine the "best" edge strategy for each connection given the slotOption.
1863  unsigned int connectionIdx = 0;
1864  for (auto&& connection : outputSlot.GetConnections())
1865  {
1866  const Layer& connectedLayer = connection->GetOwningLayer();
1867 
1868  EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer,
1869  registry, importEnabled);
1870 
1871  if (strategy == EdgeStrategy::Undefined)
1872  {
1873  result.m_Error = true;
1874  if (errMessages)
1875  {
1876  errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
1877  " between backends.");
1878  }
1879  return;
1880  }
1881 
1882  outputSlot.SetEdgeStrategy(connectionIdx, strategy);
1883 
1884  connectionIdx++;
1885  }
1886  }
1887  });
1888 
1889  return result;
1890 }

References ARMNN_SCOPED_PROFILING_EVENT, Graph::ForEachLayer(), Layer::GetBackendId(), OutputSlot::GetConnections(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, OutputSlot::SetEdgeStrategy(), OutputSlot::SetTensorHandleFactory(), and Undefined.

Referenced by Optimize().

◆ SetAllLoggingSinks()

void SetAllLoggingSinks ( bool  standardOut,
bool  debugOut,
bool  coloured 
)

Definition at line 191 of file Logging.cpp.

192 {
193  SetLoggingSinks<LogSeverity::Trace>(standardOut, debugOut, coloured);
194  SetLoggingSinks<LogSeverity::Debug>(standardOut, debugOut, coloured);
195  SetLoggingSinks<LogSeverity::Info>(standardOut, debugOut, coloured);
196  SetLoggingSinks<LogSeverity::Warning>(standardOut, debugOut, coloured);
197  SetLoggingSinks<LogSeverity::Error>(standardOut, debugOut, coloured);
198  SetLoggingSinks<LogSeverity::Fatal>(standardOut, debugOut, coloured);
199 }

Referenced by ConfigureLogging(), and TEST_SUITE().

◆ SetClSliceData()

auto SetClSliceData ( const std::vector< unsigned int > &  m_begin,
const std::vector< unsigned int > &  m_size 
)
inline

Definition at line 100 of file ClWorkloadUtils.hpp.

102 {
103  // This function must translate the size vector given to an end vector
104  // expected by the ACL NESlice workload
107 
108  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
109 
110  // For strided slices, we have the relationship size = (end - begin) / stride
111  // For slice, we assume stride to be a vector of all ones, yielding the formula
112  // size = (end - begin) therefore we know end = size + begin
113  for (unsigned int i = 0; i < num_dims; i++)
114  {
115  unsigned int revertedIndex = num_dims - i - 1;
116 
117  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
118  ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
119  }
120 
121  return std::make_tuple(starts, ends);
122 }

Referenced by ClSliceWorkload::ClSliceWorkload().

◆ SetClStridedSliceData()

auto SetClStridedSliceData ( const std::vector< int > &  m_begin,
const std::vector< int > &  m_end,
const std::vector< int > &  m_stride 
)
inline

Definition at line 79 of file ClWorkloadUtils.hpp.

82 {
86 
87  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
88 
89  for (unsigned int i = 0; i < num_dims; i++) {
90  unsigned int revertedIndex = num_dims - i - 1;
91 
92  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
93  ends.set(i, static_cast<int>(m_end[revertedIndex]));
94  strides.set(i, static_cast<int>(m_stride[revertedIndex]));
95  }
96 
97  return std::make_tuple(starts, ends, strides);
98 }

Referenced by ClStridedSliceWorkload::ClStridedSliceWorkload().

◆ SetLogFilter()

void SetLogFilter ( LogSeverity  level)

Definition at line 73 of file Logging.cpp.

74 {
75  SimpleLogger<LogSeverity::Trace>::Get().Enable(false);
76  SimpleLogger<LogSeverity::Debug>::Get().Enable(false);
77  SimpleLogger<LogSeverity::Info>::Get().Enable(false);
78  SimpleLogger<LogSeverity::Warning>::Get().Enable(false);
79  SimpleLogger<LogSeverity::Error>::Get().Enable(false);
80  SimpleLogger<LogSeverity::Fatal>::Get().Enable(false);
81  switch (level)
82  {
83  case LogSeverity::Trace:
84  SimpleLogger<LogSeverity::Trace>::Get().Enable(true);
86  case LogSeverity::Debug:
87  SimpleLogger<LogSeverity::Debug>::Get().Enable(true);
89  case LogSeverity::Info:
90  SimpleLogger<LogSeverity::Info>::Get().Enable(true);
92  case LogSeverity::Warning:
93  SimpleLogger<LogSeverity::Warning>::Get().Enable(true);
95  case LogSeverity::Error:
96  SimpleLogger<LogSeverity::Error>::Get().Enable(true);
98  case LogSeverity::Fatal:
99  SimpleLogger<LogSeverity::Fatal>::Get().Enable(true);
100  break;
101  default:
102  throw armnn::InvalidArgumentException("Unknown LoggingSeverity level.");
103  }
104 }

References ARMNN_FALLTHROUGH, Debug, SimpleLogger< Level >::Enable(), Error, Fatal, SimpleLogger< Level >::Get(), Info, Trace, and Warning.

Referenced by ConfigureLogging(), and TEST_SUITE().

◆ SetLoggingSinks()

void armnn::SetLoggingSinks ( bool  standardOut,
bool  debugOut,
bool  coloured 
)
inline

Definition at line 167 of file Logging.cpp.

168 {
169  SimpleLogger<Level>::Get().RemoveAllSinks();
170 
171  if (standardOut)
172  {
173  if (coloured)
174  {
175  SimpleLogger<Level>::Get().AddSink(
176  std::make_shared<StandardOutputColourSink>(Level));
177  } else
178  {
179  SimpleLogger<Level>::Get().AddSink(
180  std::make_shared<StandardOutputSink>());
181  }
182  }
183 
184  if (debugOut)
185  {
186  SimpleLogger<Level>::Get().AddSink(
187  std::make_shared<DebugOutputSink>());
188  }
189 }

References SimpleLogger< Level >::AddSink(), SimpleLogger< Level >::Get(), and SimpleLogger< Level >::RemoveAllSinks().

◆ SetNeonSliceData()

auto armnn::SetNeonSliceData ( const std::vector< unsigned int > &  m_begin,
const std::vector< unsigned int > &  m_size 
)
inline

Definition at line 160 of file NeonWorkloadUtils.hpp.

162 {
163  // This function must translate the size vector given to an end vector
164  // expected by the ACL NESlice workload
167 
168  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
169 
170  // For strided slices, we have the relationship size = (end - begin) / stride
171  // For slice, we assume stride to be a vector of all ones, yielding the formula
172  // size = (end - begin) therefore we know end = size + begin
173  for (unsigned int i = 0; i < num_dims; i++)
174  {
175  unsigned int revertedIndex = num_dims - i - 1;
176 
177  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
178  ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
179  }
180 
181  return std::make_tuple(starts, ends);
182 }

Referenced by NeonSliceWorkload::NeonSliceWorkload().

◆ SetNeonStridedSliceData()

auto armnn::SetNeonStridedSliceData ( const std::vector< int > &  m_begin,
const std::vector< int > &  m_end,
const std::vector< int > &  m_stride 
)
inline

Definition at line 138 of file NeonWorkloadUtils.hpp.

141 {
144  arm_compute::Coordinates strides;
145 
146  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
147 
148  for (unsigned int i = 0; i < num_dims; i++)
149  {
150  unsigned int revertedIndex = num_dims - i - 1;
151 
152  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
153  ends.set(i, static_cast<int>(m_end[revertedIndex]));
154  strides.set(i, static_cast<int>(m_stride[revertedIndex]));
155  }
156 
157  return std::make_tuple(starts, ends, strides);
158 }

Referenced by NeonStridedSliceWorkload::NeonStridedSliceWorkload().

◆ SetValueChecked()

◆ Slice()

void Slice ( const TensorInfo inputInfo,
const SliceDescriptor descriptor,
const void *  inputData,
void *  outputData,
unsigned int  dataTypeSize 
)

Definition at line 14 of file Slice.cpp.

19 {
20  const TensorShape& inputShape = inputInfo.GetShape();
21  const unsigned int numDims = inputShape.GetNumDimensions();
22 
23  constexpr unsigned int maxNumDims = 5;
24  if (descriptor.m_Begin.size() != numDims)
25  {
26  std::stringstream msg;
27  msg << "Slice: Number of dimensions (" << numDims <<
28  ") does not match the Begin vector in the descriptor (" << descriptor.m_Begin.size() << ")";
29  throw InvalidArgumentException(msg.str());
30  }
31  if (descriptor.m_Size.size() != numDims)
32  {
33  std::stringstream msg;
34  msg << "Slice: Number of dimensions (" << numDims <<
35  ") does not match the Size vector in the descriptor (" << descriptor.m_Size.size() << ")";
36  throw InvalidArgumentException(msg.str());
37  }
38  if (numDims > maxNumDims)
39  {
40  std::stringstream msg;
41  msg << "Slice: Number of dimensions (" << numDims <<
42  ") is greater than the maximum supported (" << maxNumDims << ")";
43  throw InvalidArgumentException(msg.str());
44  }
45 
46  std::vector<unsigned int> paddedInput(5);
47  std::vector<unsigned int> paddedBegin(5);
48  std::vector<unsigned int> paddedSize (5);
49 
50  const unsigned int numPaddingDims = maxNumDims - numDims;
51  for (unsigned int i = 0u; i < maxNumDims; ++i)
52  {
53  if (i < numPaddingDims)
54  {
55  paddedInput[i] = 1u;
56  paddedBegin[i] = 0u;
57  paddedSize[i] = 1u;
58  }
59  else
60  {
61  const unsigned int j = i - numPaddingDims;
62  paddedInput[i] = inputShape[j];
63  paddedBegin[i] = descriptor.m_Begin[j];
64  paddedSize[i] = descriptor.m_Size[j];
65  }
66  }
67 
68  unsigned int dim0 = paddedInput[0];
69  unsigned int dim1 = paddedInput[1];
70  unsigned int dim2 = paddedInput[2];
71  unsigned int dim3 = paddedInput[3];
72  unsigned int dim4 = paddedInput[4];
73 
74  unsigned int begin0 = paddedBegin[0];
75  unsigned int begin1 = paddedBegin[1];
76  unsigned int begin2 = paddedBegin[2];
77  unsigned int begin3 = paddedBegin[3];
78  unsigned int begin4 = paddedBegin[4];
79 
80  unsigned int size0 = paddedSize[0];
81  unsigned int size1 = paddedSize[1];
82  unsigned int size2 = paddedSize[2];
83  unsigned int size3 = paddedSize[3];
84  unsigned int size4 = paddedSize[4];
85 
86  if (begin0 + size0 > dim0)
87  {
88  std::stringstream msg;
89  msg << "Slice: begin0 + size0 (" << (begin0 + size0) <<
90  ") exceeds dim0 (" << dim0 << ")";
91  throw InvalidArgumentException(msg.str());
92  }
93  if (begin1 + size1 > dim1)
94  {
95  std::stringstream msg;
96  msg << "Slice: begin1 + size1 (" << (begin1 + size1) <<
97  ") exceeds dim2 (" << dim1 << ")";
98  throw InvalidArgumentException(msg.str());
99  }
100  if (begin2 + size2 > dim2)
101  {
102  std::stringstream msg;
103  msg << "Slice: begin2 + size2 (" << (begin2 + size2) <<
104  ") exceeds dim2 (" << dim2 << ")";
105  throw InvalidArgumentException(msg.str());
106  }
107  if (begin3 + size3 > dim3)
108  {
109  std::stringstream msg;
110  msg << "Slice: begin3 + size3 (" << (begin3 + size3) <<
111  ") exceeds dim3 (" << dim3 << ")";
112  throw InvalidArgumentException(msg.str());
113  }
114 
115  if (inputData == nullptr)
116  {
117  throw armnn::NullPointerException("Slice: Null inputData pointer");
118  }
119  if (outputData == nullptr)
120  {
121  throw armnn::NullPointerException("Slice: Null outputData pointer");
122  }
123 
124  const unsigned char* input = reinterpret_cast<const unsigned char*>(inputData);
125  unsigned char* output = reinterpret_cast<unsigned char*>(outputData);
126 
127  for (unsigned int idx0 = begin0; idx0 < begin0 + size0; ++idx0)
128  {
129  for (unsigned int idx1 = begin1; idx1 < begin1 + size1; ++idx1)
130  {
131  for (unsigned int idx2 = begin2; idx2 < begin2 + size2; ++idx2)
132  {
133  for (unsigned int idx3 = begin3; idx3 < begin3 + size3; ++idx3)
134  {
135  for (unsigned int idx4 = begin4; idx4 < begin4 + size4; ++idx4)
136  {
137  const unsigned int inputOffset =
138  ((((idx0 * dim1 + idx1) * dim2 + idx2) * dim3 + idx3) * dim4 + idx4) * dataTypeSize;
139 
140  ::memcpy(output, input + inputOffset, dataTypeSize);
141  output += dataTypeSize;
142  }
143  }
144  }
145  }
146  }
147 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), SliceDescriptor::m_Begin, and SliceDescriptor::m_Size.

◆ Softmax()

void Softmax ( Decoder< float > &  in,
Encoder< float > &  out,
const TensorInfo inputTensorInfo,
float  beta,
int  axis 
)

Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.

Definition at line 17 of file Softmax.cpp.

18 {
19  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(axis < static_cast<int>(inputTensorInfo.GetNumDimensions()),
20  "Required axis index greater than number of dimensions.");
21  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(axis >= -static_cast<int>(inputTensorInfo.GetNumDimensions()),
22  "Required axis index lower than negative of the number of dimensions");
23 
24  unsigned int uAxis = axis < 0 ?
25  inputTensorInfo.GetNumDimensions() - static_cast<unsigned int>(abs(axis))
26  : static_cast<unsigned int>(axis);
27 
28  const TensorShape& inputShape = inputTensorInfo.GetShape();
29  const unsigned int outerSize = armnnUtils::GetNumElementsBetween(inputShape, 0, uAxis);
30  const unsigned int axisSize = inputShape[uAxis];
31  const unsigned int innerSize = armnnUtils::GetNumElementsBetween(inputShape,
32  uAxis + 1,
33  inputShape.GetNumDimensions());
34 
35  for (unsigned int outer = 0; outer < outerSize; ++outer)
36  {
37  unsigned int inputBeginIdx = outer * axisSize * innerSize;
38  unsigned int inputEndIdx = inputBeginIdx + axisSize * innerSize;
39  unsigned int outputBeginIdx = outer * axisSize * innerSize;
40 
41  for (unsigned int inner = 0; inner < innerSize; ++inner, ++inputBeginIdx, ++inputEndIdx, ++outputBeginIdx)
42  {
43  // Find max
44  float maxValue = std::numeric_limits<float>::lowest();
45  for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize)
46  {
47  in[iter];
48  maxValue = std::max(maxValue, in.Get());
49  }
50 
51  // Compute sum
52  float sum = 0.0f;
53  for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize)
54  {
55  in[iter];
56  sum += std::exp((in.Get() - maxValue) * beta);
57  }
58 
59  // Compute result
60  unsigned int outputIter = outputBeginIdx;
61  out[outputIter];
62  for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize, outputIter += innerSize)
63  {
64  out[outputIter];
65  in[iter];
66  out.Set(std::exp((in.Get() - maxValue) * beta) / sum);
67  }
68  }
69  }
70 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), and Encoder< IType >::Set().

◆ SpaceToBatchNd()

void SpaceToBatchNd ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const SpaceToBatchNdDescriptor params,
Decoder< float > &  inputData,
Encoder< float > &  outputData 
)

Definition at line 48 of file SpaceToBatchNd.cpp.

53 {
54  unsigned int rank = inputInfo.GetNumDimensions();
55  if (rank != 3 && rank != 4 )
56  {
57  throw InvalidArgumentException("Tensor rank must be either 3 or 4, but it is " + std::to_string(rank),
58  CHECK_LOCATION());
59  }
60 
61  DataLayoutIndexed dataLayout = params.m_DataLayout;
62  unsigned int channelDimension3D = params.m_DataLayout == DataLayout::NCHW ? 1 : 2;
63 
64  const TensorShape& inputShape = inputInfo.GetShape();
65  const TensorShape& outputShape = outputInfo.GetShape();
66 
67  const unsigned int inputBatchSize = inputShape[0];
68  const unsigned int outputBatchSize = outputShape[0];
69 
70  const unsigned int channels = (rank == 3) ? inputShape[channelDimension3D]
71  : inputShape[dataLayout.GetChannelsIndex()];
72 
73  const unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
74  const unsigned int inputWidth = (rank == 3) ? 1 : inputShape[dataLayout.GetWidthIndex()];
75  const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
76  const unsigned int outputWidth = (rank == 3) ? 1 : outputShape[dataLayout.GetWidthIndex()];
77 
78  const unsigned int blockHeight = params.m_BlockShape[0];
79  const unsigned int blockWidth = (rank == 3) ? 1 : params.m_BlockShape[1];
80 
81  const unsigned int paddingTop = params.m_PadList[0].first;
82  const unsigned int paddingLeft = (rank == 3) ? 0 : params.m_PadList[1].first;
83 
84  for (unsigned int outB = 0; outB < outputBatchSize; ++outB)
85  {
86  unsigned int inB = outB % inputBatchSize;
87 
88  unsigned int shiftW = (outB / inputBatchSize) % blockWidth;
89  unsigned int shiftH = (outB / inputBatchSize) / blockWidth;
90 
91  for (unsigned int outH = 0; outH < outputHeight; ++outH)
92  {
93  for (unsigned int outW = 0; outW < outputWidth; ++outW)
94  {
95  if (outH * blockHeight + shiftH < paddingTop ||
96  outH * blockHeight + shiftH >= paddingTop + inputHeight ||
97  outW * blockWidth + shiftW < paddingLeft ||
98  outW * blockWidth + shiftW >= paddingLeft + inputWidth)
99  {
100  for (unsigned int c = 0; c < channels; c++)
101  {
102  unsigned int outOffset = GetOffset(outputShape,
103  outB,
104  outH,
105  outW,
106  c,
107  dataLayout);
108  outputData += outOffset;
109  outputData.Set(0);
110  outputData -= outOffset;
111  }
112  }
113  else
114  {
115  for (unsigned int c = 0; c < channels; c++)
116  {
117  unsigned int inOffset = GetOffset(inputShape,
118  inB,
119  (outH * blockHeight + shiftH) - paddingTop,
120  (outW * blockWidth + shiftW) - paddingLeft,
121  c,
122  dataLayout);
123 
124  unsigned int outOffset = GetOffset(outputShape,
125  outB,
126  outH,
127  outW,
128  c,
129  dataLayout);
130 
131  outputData += outOffset;
132  inputData += inOffset;
133  outputData.Set(inputData.Get());
134  inputData -= inOffset;
135  outputData -= outOffset;
136  }
137  }
138  }
139  }
140  }
141 }

References CHECK_LOCATION, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetNumDimensions(), GetOffset(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToBatchNdDescriptor::m_BlockShape, SpaceToBatchNdDescriptor::m_DataLayout, SpaceToBatchNdDescriptor::m_PadList, Encoder< IType >::Set(), and SpaceToBatchNd().

Referenced by SpaceToBatchNd(), and SpaceToBatchNdLayer::SpaceToBatchNdLayer().

◆ SpaceToDepth()

void SpaceToDepth ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const SpaceToDepthDescriptor params,
Decoder< float > &  inputData,
Encoder< float > &  outputData 
)

Definition at line 36 of file SpaceToDepth.cpp.

41 {
42  DataLayoutIndexed dataLayout = params.m_DataLayout;
43 
44  const TensorShape& inputShape = inputInfo.GetShape();
45  const TensorShape& outputShape = outputInfo.GetShape();
46 
47  const unsigned int inputBatchSize = inputShape[0];
48  const unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
49 
50  const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
51  const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
52  const unsigned int outputChannels = outputShape[dataLayout.GetChannelsIndex()];
53 
54  const unsigned int blockSize = params.m_BlockSize;
55 
56  if (blockSize == 0)
57  {
59  "Input shape must be divisible by block size in all spatial dimensions: Block size is"
60  " equal to zero");
61  }
62 
63  for (unsigned int outChannelIndex = 0; outChannelIndex < outputChannels; outChannelIndex++)
64  {
65  unsigned int inChannelIndex = outChannelIndex % inputChannels;
66 
67  unsigned int shiftW = (outChannelIndex / inputChannels) % blockSize;
68  unsigned int shiftH = (outChannelIndex / inputChannels) / blockSize;
69 
70  for (unsigned int outH = 0; outH < outputHeight; outH++)
71  {
72  for (unsigned int outW = 0; outW < outputWidth; outW++)
73  {
74  for (unsigned int inBatchIndex = 0; inBatchIndex < inputBatchSize; inBatchIndex++)
75  {
76  unsigned int inOffset = GetOffset(inputShape,
77  inChannelIndex,
78  (outH * blockSize + shiftH),
79  (outW * blockSize + shiftW),
80  inBatchIndex,
81  dataLayout);
82 
83  unsigned int outOffset = GetOffset(outputShape,
84  outChannelIndex,
85  outH,
86  outW,
87  inBatchIndex,
88  dataLayout);
89 
90  outputData += outOffset;
91  inputData += inOffset;
92  outputData.Set(inputData.Get());
93  inputData -= inOffset;
94  outputData -= outOffset;
95  }
96  }
97  }
98  }
99 }

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), GetOffset(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToDepthDescriptor::m_BlockSize, SpaceToDepthDescriptor::m_DataLayout, Encoder< IType >::Set(), and SpaceToDepth().

Referenced by SpaceToDepth(), and SpaceToDepthLayer::SpaceToDepthLayer().

◆ Split()

void Split ( const SplitterQueueDescriptor data,
std::vector< ITensorHandle * >  inputs,
std::vector< ITensorHandle * >  outputs 
)

Definition at line 20 of file Splitter.cpp.

23 {
24  const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
25 
26  std::unique_ptr<Decoder<float>> decoderPtr =
27  MakeDecoder<float>(inputInfo, inputs[0]->Map());
28  Decoder<float>& decoder = *decoderPtr;
29 
30  for (unsigned int index = 0; index < inputInfo.GetNumElements(); ++index)
31  {
32  unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
33 
34  unsigned int indexRemainder = index;
35  unsigned int dimensionStride = inputInfo.GetNumElements();
36 
37  for (unsigned int i = 0; i<inputInfo.GetNumDimensions(); i++)
38  {
39  dimensionStride /= inputInfo.GetShape()[i];
40  indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
41  indexRemainder -= indices[i] * dimensionStride;
42  }
43 
44  for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
45  {
46  SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
47 
48  //Split view extents are defined by the size of (the corresponding) input tensor.
49  const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
51  outputInfo.GetNumDimensions() == inputInfo.GetNumDimensions(),
52  "The number of output dimensions does not match the number of input dimensions.");
53 
54  // Check all dimensions to see if this element is inside the given input view.
55  bool insideView = true;
56  for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
57  {
58  if (indices[i] < view.m_Origin[i])
59  {
60  insideView = false;
61  }
62  if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
63  {
64  insideView = false;
65  }
66  }
67 
68  if (insideView)
69  {
70  std::unique_ptr<Encoder<float>> encoderPtr =
71  MakeEncoder<float>(outputInfo, outputs[viewIdx]->Map());
72  Encoder<float>& encoder = *encoderPtr;
73 
74  unsigned int outIndex = 0;
75  unsigned int dimensionStride = 1;
76  float inputValue = 0.f;
77 
78  for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
79  {
80  outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
81  dimensionStride *= outputInfo.GetShape()[i];
82  }
83 
84  decoder += index;
85  inputValue = decoder.Get();
86  decoder -= index;
87 
88  encoder += outIndex;
89  encoder.Set(inputValue);
90  break;
91  }
92  }
93  }
94 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), SplitterQueueDescriptor::ViewOrigin::m_Origin, SplitterQueueDescriptor::m_ViewOrigins, Map, MaxNumOfTensorDimensions, and Encoder< IType >::Set().

◆ Splitter()

void armnn::Splitter ( const SplitterQueueDescriptor data,
std::vector< ITensorHandle * >  inputs,
std::vector< ITensorHandle * >  outputs 
)

Definition at line 17 of file Splitter.hpp.

20 {
21  const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
22 
23  for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index)
24  {
25  unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
26 
27  unsigned int indexRemainder = index;
28  unsigned int dimensionStride = inputInfo0.GetNumElements();
29 
30  for (unsigned int i = 0; i<inputInfo0.GetNumDimensions(); i++)
31  {
32  dimensionStride /= inputInfo0.GetShape()[i];
33  indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
34  indexRemainder -= indices[i] * dimensionStride;
35  }
36 
37  for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
38  {
39  SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
40 
41  //Split view extents are defined by the size of (the corresponding) input tensor.
42  const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
44  outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions(),
45  "The number of output dimensions does not match the number of input dimensions.");
46 
47  // Check all dimensions to see if this element is inside the given input view.
48  bool insideView = true;
49  for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
50  {
51  if (indices[i] < view.m_Origin[i])
52  {
53  insideView = false;
54  }
55  if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
56  {
57  insideView = false;
58  }
59  }
60 
61  if (insideView)
62  {
63  unsigned int outIndex = 0;
64  unsigned int dimensionStride = 1;
65 
66  for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
67  {
68  outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
69  dimensionStride *= outputInfo.GetShape()[i];
70  }
71 
72  //We are within the view, to copy input data to the output corresponding to this view.
73  DataType* outputData = GetOutputTensorData<DataType>(viewIdx, data);
74  const DataType* inputData = GetInputTensorData<DataType>(0, data);
75  outputData[outIndex] = inputData[index];
76  }
77  }
78  }
79 }

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), SplitterQueueDescriptor::ViewOrigin::m_Origin, SplitterQueueDescriptor::m_ViewOrigins, and MaxNumOfTensorDimensions.

◆ Stack()

void Stack ( const StackQueueDescriptor data,
std::vector< std::unique_ptr< Decoder< float >>> &  inputs,
Encoder< float > &  output,
const TensorInfo inputInfo,
const TensorInfo outputInfo 
)

Definition at line 12 of file Stack.cpp.

17 {
18  unsigned int outputNumDims = outputInfo.GetNumDimensions();
19  unsigned int inputNumDims = inputInfo.GetNumDimensions();
20 
21  const armnn::TensorShape& outputDims = outputInfo.GetShape();
22  const armnn::TensorShape& inputDims = inputInfo.GetShape();
23 
24  unsigned int axis = data.m_Parameters.m_Axis;
25 
26  // Can perform a simple concatenation when axis == 0
27  if (!axis)
28  {
29  unsigned int numInputs = data.m_Parameters.m_NumInputs;
30  unsigned int inputLength = inputInfo.GetNumElements();
31 
32  for (unsigned int inputIdx=0; inputIdx<numInputs; ++inputIdx)
33  {
34  for (unsigned int elmt=0; elmt<inputLength; ++elmt)
35  {
36  (*inputs[inputIdx])[elmt];
37  output[(inputIdx * inputLength) + elmt];
38  output.Set(inputs[inputIdx]->Get());
39  }
40  }
41  return;
42  }
43 
44  const unsigned int iNumTensors = static_cast<unsigned int>(data.m_Inputs.size());
45  const unsigned int iBatchSize = inputDims[0];
46  const unsigned int iChannels = (inputNumDims > 1) ? inputDims[1] : 1;
47  const unsigned int iHeight = (inputNumDims > 2) ? inputDims[2] : 1;
48  const unsigned int iWidth = (inputNumDims > 3) ? inputDims[3] : 1;
49 
50  const unsigned int oBatchSize = outputDims[1];
51  const unsigned int oChannels = (outputNumDims > 2) ? outputDims[2] : 1;
52  const unsigned int oHeight = (outputNumDims > 3) ? outputDims[3] : 1;
53  const unsigned int oWidth = (outputNumDims > 4) ? outputDims[4] : 1;
54 
55  // Array to store the input coordinates
56  // iCoordinates[0] = i, iCoordinates[1] = bi, iCoordinates[2] = ci
57  // iCoordinates[3] = hi, iCoordinates[4] = wi, iCoordinates[5] = 0
58  // iCoordinates[5] will be always zero and used for not incrementing
59  // the output when the input has less than 4 dimensions
60  std::array<unsigned int, 6> iCoordinates{ 0 };
61 
62  // Array of pointers used to map the output coordinates to the input ones, in accordance with the axis
63  // This array is initialized with &iCoordinates[5] since this will be always zero
64  std::array<unsigned int *, 5> oCoordinates = { &iCoordinates[5],
65  &iCoordinates[5],
66  &iCoordinates[5],
67  &iCoordinates[5],
68  &iCoordinates[5] };
69 
70  // Set the axis coordinate
71  oCoordinates[axis] = &iCoordinates[0];
72 
73  // Map the output coordinates, accounting for the axis
74  unsigned int dim_shift = 0;
75  for(unsigned int dim = 0; dim < inputNumDims; ++dim)
76  {
77  if(dim == axis)
78  {
79  dim_shift++;
80  }
81  oCoordinates[dim + dim_shift] = &iCoordinates[dim + 1];
82  }
83 
84  // Alias for the input coordinates
85  unsigned int &i = iCoordinates[0];
86  unsigned int &bi = iCoordinates[1];
87  unsigned int &ci = iCoordinates[2];
88  unsigned int &hi = iCoordinates[3];
89  unsigned int &wi = iCoordinates[4];
90 
91  // Alias for the output coordinates
92  unsigned int &o = *(oCoordinates[0]);
93  unsigned int &bo = *(oCoordinates[1]);
94  unsigned int &co = *(oCoordinates[2]);
95  unsigned int &ho = *(oCoordinates[3]);
96  unsigned int &wo = *(oCoordinates[4]);
97 
98  // Stack tensors
99  for(; i < iNumTensors; ++(i))
100  {
101  for(bi = 0; bi < iBatchSize; ++(bi))
102  {
103  for(ci = 0; ci < iChannels; ++(ci))
104  {
105  for(hi = 0; hi < iHeight; ++(hi))
106  {
107  for(wi = 0; wi < iWidth; ++(wi))
108  {
109  output[o * oWidth * oHeight * oChannels * oBatchSize +
110  bo * oWidth * oHeight * oChannels +
111  co * oWidth * oHeight +
112  ho * oWidth +
113  wo];
114 
115  output.Set(inputs[i]->Get());
116 
117  ++(*(inputs[i]));
118  }
119  }
120  }
121  }
122  }
123 }

References TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), StackDescriptor::m_Axis, QueueDescriptor::m_Inputs, StackDescriptor::m_NumInputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

◆ StrEqual()

constexpr bool armnn::StrEqual ( const char *  strA,
const char(&)  strB[N] 
)
constexpr

Definition at line 201 of file TypesUtils.hpp.

202 {
203  bool isEqual = true;
204  for (unsigned i = 0; isEqual && (i < N); ++i)
205  {
206  isEqual = (strA[i] == strB[i]);
207  }
208  return isEqual;
209 }

Referenced by ParseComputeDevice().

◆ StridedSlice()

void StridedSlice ( const TensorInfo inputInfo,
const StridedSliceDescriptor params,
const void *  inputData,
void *  outputData,
unsigned int  dataTypeSize 
)

Definition at line 86 of file StridedSlice.cpp.

91 {
92  if (inputData == nullptr)
93  {
94  throw armnn::InvalidArgumentException("Slice: Null inputData pointer");
95  }
96  if (outputData == nullptr)
97  {
98  throw armnn::InvalidArgumentException("Slice: Null outputData pointer");
99  }
100 
101  const unsigned char* input = reinterpret_cast<const unsigned char*>(inputData);
102  unsigned char* output = reinterpret_cast<unsigned char*>(outputData);
103 
104  const TensorShape inputShape = ExtendShape(inputInfo.GetShape(), 4);
105 
106  StridedSliceDescriptor paddedParams = params;
107 
108  // Pad parameters to 4 dimensions
109  PadParams(paddedParams, 4);
110 
111  // Arrays containing the start and stop index for each axis (adjusted by set params/flags)
112  int startArray [4] = {0};
113  int stopArray [4] = {0};
114 
115  // Getting paddedParams stop and start values for each axis
116  for(unsigned int i = 0; i < 4; ++i)
117  {
118  startArray[i] = paddedParams.GetStartForAxis(inputShape, i);
119  stopArray[i] = paddedParams.GetStopForAxis(inputShape, i, startArray[i]);
120  }
121 
122  // Adjusting the EllipsisMask based on the NewAxisMask
123  // (if NewAxisMask extends an axis, the ellipsis flag is extended as well)
124  if(paddedParams.m_NewAxisMask > 0 && paddedParams.m_EllipsisMask > 0)
125  {
126  // Iterate until the current EllipsisMask 1-bit found
127  for(unsigned int i = 0; i < 4; ++i)
128  {
129  // If EllipsisMask bit found, adjust based on NewAxisMask and exit loop
130  if(paddedParams.m_EllipsisMask & (1 << i) && !(paddedParams.m_NewAxisMask & (1 << i)))
131  {
132  // If the previous bit is the NewAxisMask, set the EllipsisMask there
133  // (this condition was determined based on the unit tests expected data)
134  if(paddedParams.m_NewAxisMask & (1 << (i-1)))
135  {
136  paddedParams.m_EllipsisMask |= (1 << (i-1));
137  }
138  // Otherwise, extend the EllipsisMask by one bit
139  else
140  {
141  paddedParams.m_EllipsisMask |= (1 << (i+1));
142  }
143  break;
144  }
145  }
146  }
147 
148  // Processing start and stop values based on the EllipsisMask and NewAxisMask
149  for(unsigned int i = 0, dimIdx = 0; i < 4; ++i)
150  {
151  // If the EllipsisMask is set, extend the start/stop to the input dimension size
152  if(paddedParams.m_EllipsisMask & (1 << dimIdx))
153  {
154  startArray[i] = 0;
155  stopArray[i] = armnn::numeric_cast<int>(inputShape[i]);
156  }
157  // Otherwise, if the NewAxisMask is set, shift all following start/stop values to the left
158  else if(paddedParams.m_NewAxisMask & (1 << dimIdx))
159  {
160  // Increment dimIdx - skip the current dimension for which NewAxisMask is set
161  ++dimIdx;
162  }
163 
164  // If the index of the currently processed dimension is higher than
165  // the index of the current start/stop array position, shift start/stop values
166  if(dimIdx > i && !(paddedParams.m_EllipsisMask & (1 << dimIdx)))
167  {
168  if(dimIdx < 4)
169  {
170  startArray[i] = startArray[dimIdx];
171  stopArray[i] = stopArray[dimIdx];
172  }
173  else
174  {
175  // If dimIdx is greater than the amount of available dimensions,
176  // instead of shifting the next ones, create new start/stop values
177  if(paddedParams.m_EllipsisMask > 0)
178  {
179  // The new values are 0,1 if there is an EllipsisMask bit present
180  startArray[i] = 0;
181  stopArray[i] = 1;
182  }
183  else
184  {
185  // Otherwise, select the entire inputTensor dimension size
186  startArray[i] = 0;
187  stopArray[i] = armnn::numeric_cast<int>(inputShape[i]);
188  }
189  }
190  }
191  ++dimIdx;
192  }
193 
194  const int step = armnn::numeric_cast<int>(dataTypeSize);
195 
196  for (int in0 = startArray[0];
197  !LoopCondition(in0, stopArray[0], paddedParams.m_Stride[0]);
198  in0 += paddedParams.m_Stride[0])
199  {
200  for (int in1 = startArray[1];
201  !LoopCondition(in1, stopArray[1], paddedParams.m_Stride[1]);
202  in1 += paddedParams.m_Stride[1])
203  {
204  for (int in2 = startArray[2];
205  !LoopCondition(in2, stopArray[2], paddedParams.m_Stride[2]);
206  in2 += paddedParams.m_Stride[2])
207  {
208  for (int in3 = startArray[3];
209  !LoopCondition(in3, stopArray[3], paddedParams.m_Stride[3]);
210  in3 += paddedParams.m_Stride[3])
211  {
212  int dim1 = armnn::numeric_cast<int>(inputShape[1]);
213  int dim2 = armnn::numeric_cast<int>(inputShape[2]);
214  int dim3 = armnn::numeric_cast<int>(inputShape[3]);
215 
216  int inputOffset = (((in0 * dim1 + in1) * dim2 + in2) * dim3 + in3) * step;
217  ::memcpy(output, input + inputOffset, dataTypeSize);
218  output += step;
219  }
220  }
221  }
222  }
223 }

References TensorInfo::GetShape(), StridedSliceDescriptor::GetStartForAxis(), StridedSliceDescriptor::GetStopForAxis(), StridedSliceDescriptor::m_EllipsisMask, StridedSliceDescriptor::m_NewAxisMask, and StridedSliceDescriptor::m_Stride.

◆ StringToLogLevel()

LogSeverity armnn::StringToLogLevel ( std::string  level)
inline

Definition at line 43 of file Logging.hpp.

44 {
45  // Transfer to lower case
46  std::transform(level.begin(), level.end(), level.begin(),
47  [](unsigned char c){ return std::tolower(c); }
48  );
49 
50  if (level == "trace")
51  {
52  return LogSeverity::Trace;
53  }
54  else if (level == "debug")
55  {
56  return LogSeverity::Debug;
57  }
58  else if (level == "info")
59  {
60  return LogSeverity::Info;
61  }
62  else if (level == "warning")
63  {
64  return LogSeverity::Warning;
65  }
66  else if (level == "error")
67  {
68  return LogSeverity::Error;
69  }
70  else if (level == "fatal")
71  {
72  return LogSeverity::Fatal;
73  }
74  else
75  {
76  throw armnn::Exception("Unknown severity level for logging: '" + level +
77  "'. Valid options: trace, debug, info, warning, error, fatal");
78  }
79 }

References Debug, Error, Fatal, Info, Trace, and Warning.

◆ swap() [1/2]

void armnn::swap ( OriginsDescriptor first,
OriginsDescriptor second 
)

Definition at line 357 of file Descriptors.cpp.

358 {
359  using std::swap;
360  swap(first.m_NumViews, second.m_NumViews);
361  swap(first.m_NumDimensions, second.m_NumDimensions);
362  swap(first.m_ViewOrigins, second.m_ViewOrigins);
363  swap(first.m_ConcatAxis, second.m_ConcatAxis);
364 }

References swap().

Referenced by BackendId::operator=(), SquashEqualSiblingsImpl< Comparable >::Run(), BackendRegistry::Swap(), and swap().

◆ swap() [2/2]

void armnn::swap ( ViewsDescriptor first,
ViewsDescriptor second 
)

Definition at line 366 of file Descriptors.cpp.

367 {
368  using std::swap;
369  swap(first.m_Origins, second.m_Origins);
370  swap(first.m_ViewSizes, second.m_ViewSizes);
371  swap(first.m_IsAxisSet, second.m_IsAxisSet);
372  swap(first.m_Axis, second.m_Axis);
373 }

References swap().

Referenced by swap().

◆ Tile()

void Tile ( const TileDescriptor params,
const TensorInfo inputInfo,
Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 45 of file Tile.cpp.

49 {
50  // Input and output will always have same rank
51  uint32_t rank = inputInfo.GetNumDimensions();
52 
53  TensorShape inputShape = inputInfo.GetShape();
54 
55  std::vector<uint32_t> outputShape(rank);
56  for (uint32_t i = 0; i < rank; ++i)
57  {
58  outputShape[i] = inputShape[i] * params.m_Multiples[i];
59  }
60 
61  // If all values of multiples are 1, then return the input
62  if ( std::adjacent_find( params.m_Multiples.begin(), params.m_Multiples.end(),
63  std::not_equal_to<>() ) == params.m_Multiples.end() && params.m_Multiples[0] == 1)
64  {
65  for (uint32_t idx = 0; idx < inputInfo.GetNumElements(); ++idx)
66  {
67  float inputValue = inputDecoder.Get();
68  ++inputDecoder;
69  outputEncoder.Set(inputValue);
70  ++outputEncoder;
71  }
72  return;
73  }
74 
75  std::vector<float> inputData = inputDecoder.DecodeTensor(inputInfo.GetShape());
76  std::vector<float> outputData;
77  auto outputNumElements = inputData.size() * static_cast<uint32_t>(std::accumulate(begin(params.m_Multiples),
78  end(params.m_Multiples),
79  1,
80  std::multiplies<>()));
81  outputData.reserve(outputNumElements);
82 
83  for (uint32_t outputIndex = 0; outputIndex < outputNumElements; ++outputIndex)
84  {
85  std::vector<uint32_t> outputCoords = IndexToCoordinates(outputShape, outputIndex);
86 
87  // Converting output coordinates to input coordinates using modulus
88  std::vector<uint32_t> inputCoordinates;
89  inputCoordinates.reserve(rank);
90  for (uint32_t i = 0; i < rank; ++i)
91  {
92  inputCoordinates.push_back(outputCoords[i] % inputShape[i]);
93  }
94 
95  uint32_t inputIndex = CoordinatesToIndex(inputShape, inputCoordinates);
96 
97  outputEncoder[outputIndex];
98  outputEncoder.Set(inputData[inputIndex]);
99  }
100 }

References CoordinatesToIndex(), Decoder< IType >::DecodeTensor(), Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), IndexToCoordinates(), TileDescriptor::m_Multiples, and Encoder< IType >::Set().

◆ TopKSort()

void TopKSort ( unsigned int  k,
unsigned int *  indices,
const float *  values,
unsigned int  numElement 
)

Definition at line 23 of file DetectionPostProcess.cpp.

24 {
25  std::partial_sort(indices, indices + k, indices + numElement,
26  [&values](unsigned int i, unsigned int j) { return values[i] > values[j]; });
27 }

Referenced by DetectionPostProcess(), and NonMaxSuppression().

◆ TosaRefBackendId()

constexpr const char* armnn::TosaRefBackendId ( )
constexpr

Definition at line 10 of file TosaRefBackendId.hpp.

10 { return "TosaRef"; }

Referenced by TosaRefBackend::GetIdStatic().

◆ TosaRefPreCompiledWorkloadValidate()

bool TosaRefPreCompiledWorkloadValidate ( std::string *  )

Definition at line 166 of file TosaRefPreCompiledWorkload.cpp.

167 {
168  return true;
169 }

◆ TosaRefTensorHandleFactoryId()

constexpr const char* armnn::TosaRefTensorHandleFactoryId ( )
constexpr

Definition at line 15 of file TosaRefTensorHandleFactory.hpp.

15 { return "Arm/TosaRef/TensorHandleFactory"; }

Referenced by TosaRefTensorHandleFactory::GetIdStatic().

◆ TransposeConvolution2dImpl()

void TransposeConvolution2dImpl ( const TransposeConvolution2dDescriptor descriptor,
const TensorShape inputShape,
Decoder< float > &  inputDecoder,
const TensorShape outputShape,
Encoder< float > &  outputEncoder,
const TensorShape weightsShape,
Decoder< float > &  weightsDecoder,
Decoder< float > *  biasesDecoder 
)

Definition at line 15 of file TransposeConvolution2d.cpp.

23 {
24  if (descriptor.m_BiasEnabled && !biasesDecoder)
25  {
26  throw InvalidArgumentException("Biases enabled but no bias data provided");
27  }
28  const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
29  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
30  const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
31  const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
32 
33  const unsigned int numBatches = inputShape[0];
34 
35  const unsigned int inputWidth = inputShape[widthIndex];
36  const unsigned int inputHeight = inputShape[heightIndex];
37  const unsigned int inputDepth = inputShape[channelsIndex];
38 
39  const unsigned int weightsHeight = weightsShape[heightIndex];
40  const unsigned int weightsWidth = weightsShape[widthIndex];
41  const unsigned int weightsDepth = weightsShape[channelsIndex];
42 
43  const unsigned int outputHeight = outputShape[heightIndex];
44  const unsigned int outputWidth = outputShape[widthIndex];
45  const unsigned int outputDepth = outputShape[channelsIndex];
46 
47  const unsigned int paddingLeft = descriptor.m_PadLeft;
48  const unsigned int paddingTop = descriptor.m_PadTop;
49 
50  const unsigned int strideX = descriptor.m_StrideX;
51  const unsigned int strideY = descriptor.m_StrideY;
52 
53  std::vector<float> outputBuffer(outputShape.GetNumElements(), 0);
54 
55  const std::vector<float> inputVec = inputDecoder.DecodeTensor(inputShape);
56  const std::vector<float> filterVec = weightsDecoder.DecodeTensor(weightsShape);
57 
58  for (unsigned int batch = 0u; batch < numBatches; ++batch)
59  {
60  for (unsigned int yInput = 0u; yInput < inputHeight; ++yInput)
61  {
62  for (unsigned int xInput = 0u; xInput < inputWidth; ++xInput)
63  {
64  unsigned int xOutputOrigin = xInput * strideX - paddingLeft;
65  unsigned int yOutputOrigin = yInput * strideY - paddingTop;
66 
67  for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
68  {
69  for (unsigned int yWeights = 0u; yWeights < weightsHeight; ++yWeights)
70  {
71  for (unsigned int xWeights = 0u; xWeights < weightsWidth; ++xWeights)
72  {
73  unsigned int yOutput = yOutputOrigin + yWeights;
74  unsigned int xOutput = xOutputOrigin + xWeights;
75 
76  if (yOutput < outputHeight && xOutput< outputWidth)
77  {
78  for (unsigned int dInput = 0u; dInput < inputDepth; dInput++)
79  {
80  unsigned int inputIndex;
81  unsigned int outputIndex;
82  unsigned int weightsIndex;
83 
84  if(descriptor.m_DataLayout == armnn::DataLayout::NHWC)
85  {
86  inputIndex = batch * inputHeight * inputWidth * inputDepth +
87  yInput * inputWidth * inputDepth +
88  xInput * inputDepth +
89  dInput;
90 
91  weightsIndex = dOutput * weightsHeight * weightsWidth * weightsDepth +
92  yWeights * weightsWidth * weightsDepth +
93  xWeights * weightsDepth +
94  dInput;
95 
96  outputIndex = batch * outputHeight * outputWidth * outputDepth +
97  yOutput * outputWidth * outputDepth +
98  xOutput * outputDepth +
99  dOutput;
100  }
101  else
102  {
103  inputIndex = batch * inputDepth * inputHeight * inputWidth +
104  dInput * inputHeight * inputWidth +
105  yInput * inputWidth +
106  xInput;
107 
108  weightsIndex = dOutput * weightsDepth * weightsHeight * weightsWidth +
109  dInput * weightsHeight * weightsWidth +
110  yWeights * weightsWidth +
111  xWeights;
112 
113  outputIndex = batch * outputDepth * outputHeight * outputWidth +
114  dOutput * outputHeight * outputWidth +
115  yOutput * outputWidth +
116  xOutput;
117  }
118 
119  outputBuffer[outputIndex] += inputVec[inputIndex] * filterVec[weightsIndex];
120  }
121  }
122  }
123  }
124 
125  }
126  }
127  }
128  }
129 
130  // Apply bias (if enabled)
131  if (descriptor.m_BiasEnabled)
132  {
133  outputEncoder[0];
134  Decoder<float>& rBiasesDecoder = *biasesDecoder;
135 
136  for (unsigned int batch = 0u; batch < numBatches; ++batch)
137  {
138  for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
139  {
140  rBiasesDecoder[dOutput];
141  for (unsigned int yOutput = 0u; yOutput < outputHeight; ++yOutput)
142  {
143  for (unsigned int xOutput = 0u; xOutput < outputWidth; ++xOutput)
144  {
145  const unsigned int outputIndex =
146  dataLayoutIndexed.GetIndex(outputShape, batch, dOutput, yOutput, xOutput);
147  outputBuffer[outputIndex] += rBiasesDecoder.Get();
148  }
149  }
150  }
151  }
152  }
153  outputEncoder[0];
154  for (float output : outputBuffer)
155  {
156  outputEncoder.Set(output);
157  ++outputEncoder;
158  }
159 }

References Decoder< IType >::DecodeTensor(), Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorShape::GetNumElements(), DataLayoutIndexed::GetWidthIndex(), TransposeConvolution2dDescriptor::m_BiasEnabled, TransposeConvolution2dDescriptor::m_DataLayout, TransposeConvolution2dDescriptor::m_PadLeft, TransposeConvolution2dDescriptor::m_PadTop, TransposeConvolution2dDescriptor::m_StrideX, TransposeConvolution2dDescriptor::m_StrideY, NHWC, and Encoder< IType >::Set().

◆ TrueFunc()

bool armnn::TrueFunc ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 54 of file LayerSupportCommon.hpp.

55 {
56  IgnoreUnused(reasonIfUnsupported);
57  IgnoreUnused(params...);
58  return true;
59 }

References IgnoreUnused().

◆ ValidateSourcesMatchOptimizedNetwork()

void armnn::ValidateSourcesMatchOptimizedNetwork ( std::vector< BackendOptions optimizedOptions,
const INetworkProperties networkProperties 
)

This function performs a sanity check to ensure that the combination of input and output memory source matches the values for importEnabled and exportEnabled that were specified during optimization.

During optimization the tensor handle factories are chosen based on whether import and export are enabled. If the user then specifies something incompatible here it can lead to problems.

Parameters
optimizedOptions
networkProperties

Definition at line 101 of file LoadedNetwork.cpp.

103 {
104  // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
105  // added as backend options.
106  const vector<BackendOptions>::iterator& backendItr =
107  find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
108  if (backend.GetBackendId().Get() == "Global")
109  {
110  return true;
111  }
112  else
113  {
114  return false;
115  }
116  });
117  bool importEnabled = false;
118  bool exportEnabled = false;
119  if (backendItr != optimizedOptions.end())
120  {
121  // Find the importEnabled and exportEnabled values.
122  for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
123  {
124  const BackendOptions::BackendOption& option = backendItr->GetOption(i);
125  if (option.GetName() == "ImportEnabled")
126  {
127  importEnabled = option.GetValue().AsBool();
128  }
129  if (option.GetName() == "ExportEnabled")
130  {
131  exportEnabled = option.GetValue().AsBool();
132  }
133  }
134  }
135 
136  // Now that we have values for import and export compare them to the MemorySource variables.
137  // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
138  if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
139  (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
140  {
141  auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
142  if (!importEnabled)
143  {
144  message.append(" requires that memory import be enabled. However, "
145  "it was disabled when this network was optimized.");
146  }
147  else
148  {
149  message.append(" requires that memory import be disabled. However, "
150  "it was enabled when this network was optimized.");
151  }
152  throw InvalidArgumentException(message);
153  }
154 
155  if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
156  (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
157  {
158  auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
159  if (!exportEnabled)
160  {
161  message.append(" requires that memory export be enabled. However, "
162  "it was disabled when this network was optimized.");
163  }
164  else
165  {
166  message.append(" requires that memory export be disabled. However, "
167  "it was enabled when this network was optimized.");
168  }
169  throw InvalidArgumentException(message);
170  }
171 } // anonymous

◆ VerifyClContextBuffer()

bool armnn::VerifyClContextBuffer ( flatbuffers::Verifier &  verifier)
inline

Definition at line 157 of file ClContextSchema_generated.h.

158  {
159  return verifier.VerifyBuffer<armnn::ClContext>(ClContextIdentifier());
160 }

References ClContextIdentifier().

◆ VerifySizePrefixedClContextBuffer()

bool armnn::VerifySizePrefixedClContextBuffer ( flatbuffers::Verifier &  verifier)
inline

Definition at line 162 of file ClContextSchema_generated.h.

163  {
164  return verifier.VerifySizePrefixedBuffer<armnn::ClContext>(ClContextIdentifier());
165 }

References ClContextIdentifier().

◆ VerifyTensorInfoDataType()

void armnn::VerifyTensorInfoDataType ( const armnn::TensorInfo info,
armnn::DataType  dataType 
)
inline

Definition at line 382 of file TypesUtils.hpp.

383 {
384  if (info.GetDataType() != dataType)
385  {
386  std::stringstream ss;
387  ss << "Unexpected datatype:" << armnn::GetDataTypeName(info.GetDataType())
388  << " for tensor:" << info.GetShape()
389  << ". The type expected to be: " << armnn::GetDataTypeName(dataType);
390  throw armnn::Exception(ss.str());
391  }
392 }

References GetDataTypeName(), and info.

◆ WrapClError()

RuntimeException WrapClError ( const cl::Error &  clError,
const CheckLocation location 
)
inline

Definition at line 159 of file ClWorkloadUtils.hpp.

160 {
161  std::stringstream message;
162  message << "CL error: " << clError.what() << ". Error code: " << clError.err();
163 
164  return RuntimeException(message.str(), location);
165 }

References Exception::what().

Referenced by RunClFunction().

Variable Documentation

◆ cpuAccCapabilities

const BackendCapabilities cpuAccCapabilities("CpuAcc", { {"NonConstWeights", true}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", arm_compute::CPUInfo::get().has_fp16()} })

◆ cpuRefCapabilities

const BackendCapabilities cpuRefCapabilities("CpuRef", { {"NonConstWeights", true}, {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", true} })

◆ EXPIRE_RATE

constexpr unsigned int EXPIRE_RATE = 3U
constexpr

Variable to control expire rate of priority queue.

Definition at line 37 of file Types.hpp.

◆ g_AggregateProfilingEventsByInference

constexpr bool g_AggregateProfilingEventsByInference = true
constexpr

Definition at line 37 of file Profiling.cpp.

Referenced by ProfilerImpl::AnalyzeEventsAndWriteResults().

◆ g_ProfilingEventCountHint

constexpr std::size_t g_ProfilingEventCountHint = 1024
constexpr

Definition at line 29 of file Profiling.cpp.

Referenced by ProfilerImpl::ProfilerImpl().

◆ g_WriteProfilingEventSequence

constexpr bool g_WriteProfilingEventSequence = true
constexpr

Definition at line 32 of file Profiling.cpp.

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults().

◆ g_WriteReportToStdOutOnProfilerDestruction

constexpr bool g_WriteReportToStdOutOnProfilerDestruction = false
constexpr

Definition at line 41 of file Profiling.cpp.

Referenced by ProfilerImpl::~ProfilerImpl().

◆ gpuFsaCapabilities

const BackendCapabilities gpuFsaCapabilities("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", false} })

◆ LOWEST_CAPTURE_PERIOD

constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u
constexpr

The lowest performance data capture interval we support is 10 miliseconds.

Definition at line 34 of file Types.hpp.

◆ MaxNumOfTensorDimensions

◆ oldCpuRefCapabilities

const std::set<armnn::BackendCapability> oldCpuRefCapabilities
Initial value:

Definition at line 25 of file RefBackend.hpp.

◆ paddingRequiredLayers

const std::set<armnn::LayerType> paddingRequiredLayers
Initial value:
{
LayerType::Convolution2d,
LayerType::DepthwiseConvolution2d,
LayerType::Lstm,
LayerType::Mean,
LayerType::QuantizedLstm,
LayerType::TransposeConvolution2d
}

Definition at line 16 of file NeonTensorHandleFactory.hpp.

Referenced by NeonTensorHandleFactory::GetCapabilities().

◆ tl_Profiler

thread_local IProfiler* tl_Profiler = nullptr

◆ wordSize

constexpr size_t wordSize = sizeof(size_t) * 8
constexpr

Definition at line 22 of file SingleAxisPriorityList.cpp.

ARMNN_ASSERT
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
armnn::ArgMinMaxFunction::Max
@ Max
armnn::TuningLevel::Exhaustive
@ Exhaustive
armnn::MemorySource::Malloc
@ Malloc
armnn::CapabilityClass::FallbackImportDisabled
@ FallbackImportDisabled
armnnUtils::Transpose
void Transpose(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Transpose.cpp:153
armnn::PaddingMode::Symmetric
@ Symmetric
armnn::NeonMinimumWorkloadValidate
arm_compute::Status NeonMinimumWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
Validate function for validating the inputs and output.
Definition: NeonMinimumWorkload.cpp:15
armnn::GetBinaryOperationAsCString
constexpr char const * GetBinaryOperationAsCString(BinaryOperation operation)
Definition: TypesUtils.hpp:76
IS_MULTI_AXES_REDUCE_SUPPORTED
#define IS_MULTI_AXES_REDUCE_SUPPORTED(func, input, desc, status)
Macro function check if layer with multiple axes is supported on each backend.
Definition: ArmComputeUtils.hpp:373
armnn::optimizations::InsertDebugToFileLayer
OptimizeForType< Layer, AddDebugToFileImpl > InsertDebugToFileLayer
Definition: AddDebug.hpp:54
VectorVectorCwiseProduct
void VectorVectorCwiseProduct(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:187
armnn::IOptimizedNetworkPtr
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:340
armnn::MemBlockStrategyType::MultiAxisPacking
@ MultiAxisPacking
armnn::ApplyBackendOptimizations
OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, BackendsMap &backends, const ModelOptions &modelOptions, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:1320
armnn::Compute::Undefined
@ Undefined
armnn::BinaryOperation::Mul
@ Mul
armnn::DataType::Boolean
@ Boolean
armnn::Pooling2dDescriptor::m_PaddingMethod
PaddingMethod m_PaddingMethod
The padding method to be used. (Exclude, IgnoreValue).
Definition: Descriptors.hpp:425
armnn::optimizations::InsertDebugLayer
OptimizeForType< Layer, AddDebugImpl > InsertDebugLayer
Definition: AddDebug.hpp:53
armnn::GenerateRangeK
std::vector< unsigned int > GenerateRangeK(unsigned int k)
Definition: DetectionPostProcess.cpp:16
armnn::LayerType::Permute
@ Permute
armnn::optimizations::FuseBatchNormIntoConvolution2DFloat32
OptimizeForExclusiveConnection< Convolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< Convolution2dLayer, armnn::DataType::Float32 > > FuseBatchNormIntoConvolution2DFloat32
Definition: FuseBatchNorm.hpp:222
armnn::NormalizationAlgorithmChannel::Within
@ Within
armnn::BinaryOperation::Add
@ Add
armnn::NeonAdditionWorkloadValidate
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: NeonAdditionWorkload.cpp:20
armnn::BackendOptions::Var::AsInt
int AsInt() const
Definition: BackendOptions.hpp:120
armnn::Convert1HWOTensorInfoToAcl
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl(const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo...
Definition: WorkloadUtils.cpp:177
CONSTRUCT_IN_PLACE
#define CONSTRUCT_IN_PLACE
Definition: Optional.hpp:41
armnn::ConvertActivationDescriptorToAclActivationLayerInfo
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor *activationDescPtr)
Definition: ArmComputeUtils.hpp:94
armnn::ComparisonOperation::LessOrEqual
@ LessOrEqual
armnn::MakeInfo
arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor &descriptor)
Definition: NeonDetectionPostProcessWorkload.cpp:17
armnn::GetLayerTypeAsCString
const char * GetLayerTypeAsCString(LayerType type)
Definition: InternalTypes.cpp:13
armnn::Encoder::Set
virtual void Set(IType right)=0
armnn::NeonMultiplicationWorkloadValidate
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: NeonMultiplicationWorkload.cpp:19
armnn::Activation
void Activation(Decoder< float > &in, Encoder< float > &out, const TensorInfo &tensorInfo, ActivationFunction function, float a, float b)
Definition: Activation.cpp:102
armnn::DataLayout::NCDHW
@ NCDHW
armnn::SetClStridedSliceData
auto SetClStridedSliceData(const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
Definition: ClWorkloadUtils.hpp:79
armnn::IsQuantized8BitType
constexpr bool IsQuantized8BitType(DataType dataType)
Definition: TypesUtils.hpp:316
armnn::Compute::GpuAcc
@ GpuAcc
GPU Execution: OpenCL: ArmCompute.
armnn::ActivationFunction::LeakyReLu
@ LeakyReLu
MeanStddevNormalization
void MeanStddevNormalization(armnn::Decoder< float > &input_vector, armnn::Encoder< float > &output_vector, uint32_t v_size, uint32_t n_batch, float normalization_epsilon)
Definition: LstmUtils.cpp:40
VectorBatchVectorCwiseProductAccumulate
void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:131
armnn::MemorySource::Gralloc
@ Gralloc
armnn::DataLayout
DataLayout
Definition: Types.hpp:62
armnn::InsertConvertFp16ToFp32LayersBefore
std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
Definition: NetworkUtils.cpp:40
armnn::Pooling3dDescriptor::m_PadTop
uint32_t m_PadTop
Padding top value in the height dimension.
Definition: Descriptors.hpp:479
armnn::SpaceToBatchNdDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:1071
armnn::Splitter
void Splitter(const SplitterQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)
Definition: Splitter.hpp:17
armnn::Append
void Append(Optimizer::Optimizations &optimizations, Front &&front, Others &&... others)
Definition: Optimizer.hpp:36
armnn::CollapseLeadingUnitDimensions
bool CollapseLeadingUnitDimensions(const TensorInfo &in, TensorInfo &out)
Definition: NeonBackendOptimizationUtils.hpp:14
armnn::DepthwiseConvolution2dDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:708
armnn::Pooling2dDescriptor::m_PoolHeight
uint32_t m_PoolHeight
Pooling height value.
Definition: Descriptors.hpp:417
armnn::QosExecPriority::Medium
@ Medium
armnn::optimizations::OptimizeInversePermutes
OptimizeForConnection< PermuteLayer, PermuteLayer, OptimizeInversePermutesImpl< PermuteLayer > > OptimizeInversePermutes
Definition: OptimizeInversePermutes.hpp:43
armnn::DataLayout::NHWC
@ NHWC
armnn::ActivationFunction::SoftReLu
@ SoftReLu
armnn::ConvertResizeMethodToAclInterpolationPolicy
arm_compute::InterpolationPolicy ConvertResizeMethodToAclInterpolationPolicy(ResizeMethod resizeMethod)
Definition: ArmComputeUtils.hpp:213
armnn::optimizations::TransposeAndBatchToSpaceAsDepthToSpace
OptimizeForConnection< TransposeLayer, BatchToSpaceNdLayer, PermuteAndBatchToSpaceAsDepthToSpaceImpl< TransposeLayer > > TransposeAndBatchToSpaceAsDepthToSpace
Definition: PermuteAndBatchToSpaceAsDepthToSpace.hpp:104
armnn::Convolution3dDescriptor::m_PadFront
uint32_t m_PadFront
Padding front value in the depth dimension.
Definition: Descriptors.hpp:637
VectorBatchVectorAdd
void VectorBatchVectorAdd(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:16
armnn::EdgeStrategy::DirectCompatibility
@ DirectCompatibility
No strategy has been defined. Used internally to verify integrity of optimizations.
armnn::TuningLevel::None
@ None
armnn::Pooling2d
void Pooling2d(Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling2dDescriptor &params)
Computes the Pooling2d operation.
Definition: Pooling2d.cpp:142
armnn::LogSeverity::Trace
@ Trace
armnn::BackendCapability::NonConstWeights
@ NonConstWeights
Constant weights can be accessed through the descriptors, On the other hand, non-const weights can be...
armnn::optimizations::FoldPadIntoPooling2d
OptimizeForExclusiveConnection< PadLayer, Pooling2dLayer, pad_fold::FoldPadIntoPooling2dImpl > FoldPadIntoPooling2d
Definition: FoldPadIntoLayer2d.hpp:283
armnn::Compute::CpuRef
@ CpuRef
CPU Execution: Reference C++ kernels.
armnn::NeonSubtractionWorkloadValidate
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: NeonSubtractionWorkload.cpp:22
armnn::optimizations::Fp32NetworkToFp16Converter
OptimizeForType< Layer, ConvertFp32NetworkToFp16Impl > Fp32NetworkToFp16Converter
Definition: ConvertFp32NetworkToFp16.hpp:87
armnnUtils::GetUnsignedAxis
unsigned int GetUnsignedAxis(const unsigned int inputDimension, const int axis)
Definition: TensorUtils.cpp:236
armnn::ViewsDescriptor::HasAxis
bool HasAxis() const
Returns true if an axis has been set.
Definition: Descriptors.cpp:388
armnn::GetStatusAsCString
constexpr char const * GetStatusAsCString(Status status)
Definition: TypesUtils.hpp:21
armnn::ConvertAdditionalInfoToAclActivationLayerInfo
arm_compute::ActivationLayerInfo ConvertAdditionalInfoToAclActivationLayerInfo(const QueueDescriptor &queueDescriptor)
Definition: ArmComputeUtils.hpp:105
armnn::IsLayerOptimizable
bool IsLayerOptimizable(const armnn::Layer &layer)
Definition: MockBackend.cpp:99
armnn::TensorShape::GetDimensionSpecificity
bool GetDimensionSpecificity(unsigned int i) const
Gets information about if the dimension size has been specified or not.
Definition: Tensor.cpp:211
ARMNN_SCOPED_PROFILING_EVENT_CL
#define ARMNN_SCOPED_PROFILING_EVENT_CL(name)
Definition: ClWorkloadUtils.hpp:21
armnn::DepthwiseConvolution2dDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:710
armnn::ActivationFunction::Sqrt
@ Sqrt
armnn::GetTimeNow
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
armnn::JsonObjectType::Measurement
@ Measurement
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::CreateProgram
flatbuffers::Offset< Program > CreateProgram(flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::String > name=0, flatbuffers::Offset< flatbuffers::Vector< uint8_t >> binary=0)
Definition: ClContextSchema_generated.h:118
armnn::optimizations::FoldPadIntoConvolution2d
OptimizeForExclusiveConnection< PadLayer, Convolution2dLayer, pad_fold::FoldPadIntoConvolution2dImpl > FoldPadIntoConvolution2d
Definition: FoldPadIntoLayer2d.hpp:277
armnn::NeonMaximumWorkloadValidate
arm_compute::Status NeonMaximumWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
Definition: NeonMaximumWorkload.cpp:14
armnn::AllocateOutputData
void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const std::vector< float > &boxCorners, const std::vector< unsigned int > &outputIndices, const std::vector< unsigned int > &selectedBoxes, const std::vector< unsigned int > &selectedClasses, const std::vector< float > &selectedScores, float *detectionBoxes, float *detectionScores, float *detectionClasses, float *numDetections)
Definition: DetectionPostProcess.cpp:101
armnn::MemorySource::DmaBufProtected
@ DmaBufProtected
armnn::NormalizationAlgorithmMethod::LocalBrightness
@ LocalBrightness
Krichevsky 2012: Local Brightness Normalization.
armnn::IsSequenceLayerType
bool IsSequenceLayerType(Layer &layer, LayerType type)
Definition: SubgraphUtils.hpp:362
armnn::BinaryOperation::Sub
@ Sub
armnn::PermuteTensor
armnn::ConstTensor PermuteTensor(const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
Definition: WorkloadUtils.cpp:19
armnn::GetDataTypeName
constexpr const char * GetDataTypeName(DataType dataType)
Definition: TypesUtils.hpp:233
armnn::SetNeonStridedSliceData
auto SetNeonStridedSliceData(const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
Definition: NeonWorkloadUtils.hpp:138
armnn::optimizations::ConvertConstDequantisationLayersToConstLayers
OptimizeForConnection< ConstantLayer, DequantizeLayer, ConvertConstDequantisationLayersToConstLayersImpl > ConvertConstDequantisationLayersToConstLayers
Definition: ConvertConstDequantisationLayersToConstLayers.hpp:173
armnn::OutputShapeRounding::Floor
@ Floor
armnn::Pooling2dDescriptor::m_StrideY
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
Definition: Descriptors.hpp:421
armnn::TensorInfo::GetNumDimensions
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:197
armnn::BatchToSpaceNdDescriptor::m_BlockShape
std::vector< unsigned int > m_BlockShape
Block shape values.
Definition: Descriptors.hpp:898
armnn::Reduce
void Reduce(const TensorInfo &inputInfo, const TensorInfo &outputInfo, Decoder< float > &input, Encoder< float > &output, const std::vector< uint32_t > axis, const ReduceOperation reduceOperation)
Definition: Reduce.cpp:70
CHECK_LOCATION
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
armnn::SetLogFilter
void SetLogFilter(LogSeverity level)
Definition: Logging.cpp:73
armnnUtils::DataLayoutIndexed
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout.
Definition: DataLayoutIndexed.hpp:17
armnn::optimizations::MoveTransposeUp
OptimizeForConnection< Layer, TransposeLayer, MoveTransposeUpImpl > MoveTransposeUp
Definition: MoveTransposeUp.hpp:83
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
armnn::GraphEvent::LayerAdded
@ LayerAdded
armnn::DataType::Float32
@ Float32
armnn::GetDataLayoutName
constexpr const char * GetDataLayoutName(DataLayout dataLayout)
Definition: TypesUtils.hpp:253
armnnUtils::DataLayoutIndexed::GetDataLayout
armnn::DataLayout GetDataLayout() const
Definition: DataLayoutIndexed.hpp:22
armnn::ActivationFunction::TanH
@ TanH
VectorBatchVectorCwiseProduct
void VectorBatchVectorCwiseProduct(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:152
ClipVector
void ClipVector(armnn::Decoder< float > &vector, uint32_t vSize, float absLimit, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:229
armnn::LogSoftmax
void LogSoftmax(Decoder< float > &input, Encoder< float > &output, const TensorInfo &inputInfo, const LogSoftmaxDescriptor &descriptor)
Definition: LogSoftmax.cpp:27
armnn::GpuFsaBackendId
constexpr const char * GpuFsaBackendId()
Definition: GpuFsaBackendId.hpp:10
armnn::LogicalBinaryOperation::LogicalOr
@ LogicalOr
armnn::optimizations::BroadcastToOptimizationLayer
OptimizeForType< BroadcastToLayer, DeleteBroadcastToImpl > BroadcastToOptimizationLayer
Definition: DeleteBroadcastTo.hpp:38
armnn::Pooling2dDescriptor::m_PadTop
uint32_t m_PadTop
Padding top value in the height dimension.
Definition: Descriptors.hpp:411
armnn::Convolution3dDescriptor::m_PadTop
uint32_t m_PadTop
Padding top value in the height dimension.
Definition: Descriptors.hpp:633
ARMNN_NO_DEPRECATE_WARN_BEGIN
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
armnn::PoolingAlgorithm::L2
@ L2
armnn::SpaceToBatchNdDescriptor::m_BlockShape
std::vector< unsigned int > m_BlockShape
Block shape value.
Definition: Descriptors.hpp:1066
MatrixBatchVectorMultiplyAccumulate
void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder< float > &matrix, uint32_t mRows, uint32_t mCols, armnn::Decoder< float > &vector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:87
armnn::Convolution3dDescriptor::m_DilationX
uint32_t m_DilationX
Dilation along x axis.
Definition: Descriptors.hpp:647
armnn::PaddingMode
PaddingMode
The padding mode controls whether the padding should be filled with constant values (Constant),...
Definition: Types.hpp:200
armnn::Convolution3dDescriptor::m_PadBottom
uint32_t m_PadBottom
Padding bottom value in the height dimension.
Definition: Descriptors.hpp:635
armnn::MaxNumOfTensorDimensions
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:31
armnn::DataType::QAsymmU8
@ QAsymmU8
armnn::QosExecPriority::High
@ High
armnn::LogSeverity::Info
@ Info
armnn::ActivationFunction::BoundedReLu
@ BoundedReLu
min(a, max(b, input)) ReLu1 & ReLu6.
armnn::minimum
Definition: Minimum.hpp:12
armnn::optimizations::PermuteAsReshape
OptimizeForType< PermuteLayer, PermuteAsReshapeImpl > PermuteAsReshape
Definition: PermuteAsReshape.hpp:66
armnn::DataType::QSymmS8
@ QSymmS8
armnn::ReportWarning
void ReportWarning(const std::string &warningMessage, Optional< std::vector< std::string > & > warningMessages)
Definition: Network.cpp:774
armnnUtils::Permute
void Permute(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Permute.cpp:164
armnn::Half
half_float::half Half
Definition: Half.hpp:22
armnn::ComputeConv3DInfo
arm_compute::Conv3dInfo ComputeConv3DInfo(const armnn::Convolution3dDescriptor descriptor, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor.
Definition: ArmComputeUtils.hpp:261
armnn::Pooling3dDescriptor::m_StrideZ
uint32_t m_StrideZ
Stride value when proceeding through input for the depth dimension.
Definition: Descriptors.hpp:497
armnn::CreateClContext
flatbuffers::Offset< ClContext > CreateClContext(flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>> programs=0)
Definition: ClContextSchema_generated.h:57
armnn::FoldPadLayer
LayerType * FoldPadLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, PadLayer *padLayer)
Definition: SubgraphUtils.hpp:234
armnnUtils::Permuted
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:125
armnn::CopyArmComputeClTensorData
void CopyArmComputeClTensorData(arm_compute::CLTensor &dstTensor, const T *srcData)
Definition: ClWorkloadUtils.hpp:64
armnn::Pooling2dDescriptor::m_PoolWidth
uint32_t m_PoolWidth
Pooling width value.
Definition: Descriptors.hpp:415
armnn::Stack
void Stack(const StackQueueDescriptor &data, std::vector< std::unique_ptr< Decoder< float >>> &inputs, Encoder< float > &output, const TensorInfo &inputInfo, const TensorInfo &outputInfo)
Definition: Stack.cpp:12
armnn::UnaryOperation::Neg
@ Neg
armnn::optimizations::PermuteAndBatchToSpaceAsDepthToSpace
OptimizeForConnection< PermuteLayer, BatchToSpaceNdLayer, PermuteAndBatchToSpaceAsDepthToSpaceImpl< PermuteLayer > > PermuteAndBatchToSpaceAsDepthToSpace
Definition: PermuteAndBatchToSpaceAsDepthToSpace.hpp:102
armnn::BatchToSpaceNd
void BatchToSpaceNd(const TensorInfo &inputInfo, const TensorInfo &outputInfo, const BatchToSpaceNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
Definition: BatchToSpaceNd.cpp:50
armnn::Pooling3dDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCDHW, NDHWC).
Definition: Descriptors.hpp:503
armnn::GetActivationFunctionAsCString
constexpr char const * GetActivationFunctionAsCString(ActivationFunction activation)
Definition: TypesUtils.hpp:31
armnn::BatchToSpaceNdDescriptor::m_Crops
std::vector< std::pair< unsigned int, unsigned int > > m_Crops
The values to crop from the input dimension.
Definition: Descriptors.hpp:900
CopyVector
void CopyVector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:244
armnn::optimizations::PermuteDepthwiseConv2dWeights
OptimizeForType< Layer, PermuteDepthwiseConv2dWeightsImpl > PermuteDepthwiseConv2dWeights
Definition: PermuteDepthwiseConv2dWeights.hpp:78
armnn::SelectTensorHandleStrategy
OptimizationResult SelectTensorHandleStrategy(Graph &optGraph, BackendsMap &backends, TensorHandleFactoryRegistry &registry, bool importEnabled, bool exportEnabled, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:1821
armnn::IsNCHW
bool IsNCHW(armnn::Layer &layer)
Definition: SubgraphUtils.hpp:213
armnn::BoostLogSeverityMapping::error
@ error
armnn::Pooling3dDescriptor::m_PadBottom
uint32_t m_PadBottom
Padding bottom value in the height dimension.
Definition: Descriptors.hpp:481
armnn::Coordinates
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
Definition: InternalTypes.hpp:15
Sub1Vector
void Sub1Vector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &result)
Definition: LstmUtils.cpp:173
armnn::TensorInfo::IsConstant
bool IsConstant() const
Definition: Tensor.cpp:513
armnn::ReduceOperation::Mean
@ Mean
armnn::ActivationFunction::HardSwish
@ HardSwish
armnn::DataType::QSymmS16
@ QSymmS16
armnn::ActivationFunction::Gelu
@ Gelu
armnn::numeric_cast
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
armnn::LayerType::ElementwiseBinary
@ ElementwiseBinary
armnn::DataType::BFloat16
@ BFloat16
armnn::optimizations::MovePermuteUp
OptimizeForConnection< Layer, PermuteLayer, MovePermuteUpImpl > MovePermuteUp
Definition: MovePermuteUp.hpp:83
armnn::FullyConnected
void FullyConnected(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *pBiasDecoder, const bool biasEnabled, const unsigned int K, const bool transposeWeights)
Performs a matrix multiplication and optionally adds a bias.
Definition: FullyConnected.cpp:13
armnn::Pooling3dDescriptor::m_StrideY
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
Definition: Descriptors.hpp:495
armnn::optimizations::OptimizeInverseConversionsFp16
OptimizeForConnection< ConvertFp16ToFp32Layer, ConvertFp32ToFp16Layer, OptimizeInverseConversionsImpl > OptimizeInverseConversionsFp16
Definition: OptimizeInverseConversions.hpp:42
armnn::Pooling3d
void Pooling3d(Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling3dDescriptor &params)
Computes the Pooling3d operation.
Definition: Pooling3d.cpp:172
armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:332
armnn::NormalizationAlgorithmChannel::Across
@ Across
armnn::ClContextIdentifier
const char * ClContextIdentifier()
Definition: ClContextSchema_generated.h:148
armnn::GetOffset
unsigned int GetOffset(const TensorShape &shape, unsigned int b, unsigned int h, unsigned int w, unsigned int c, const DataLayoutIndexed &dataLayout)
Definition: SpaceToBatchNd.cpp:15
armnn::Combine
MemorySourceFlags Combine(Arg source, Args... rest)
Definition: MemorySources.hpp:36
armnnUtils::DataLayoutIndexed::GetHeightIndex
unsigned int GetHeightIndex() const
Definition: DataLayoutIndexed.hpp:24
armnn::ComparisonOperation::NotEqual
@ NotEqual
armnn::Compute
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
Definition: BackendId.hpp:21
armnn::ComparisonOperation::GreaterOrEqual
@ GreaterOrEqual
armnn::FusedKernelType::AddMulAdd
@ AddMulAdd
armnnUtils::GetPerAxisParams
std::pair< unsigned int, std::vector< float > > GetPerAxisParams(const armnn::TensorInfo &info)
Definition: TensorUtils.cpp:280
armnn::LogSeverity::Error
@ Error
armnn::Exception::what
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
armnn::QosExecPriority::Low
@ Low
armnn::OptimizationResult::IsOk
bool IsOk() const
Definition: Network.hpp:276
ARMNN_LOG
#define ARMNN_LOG(severity)
Definition: Logging.hpp:212
armnn::DataLayout::NDHWC
@ NDHWC
armnn::EdgeStrategy::CopyToTarget
@ CopyToTarget
Source backends tensor data can be exported to destination backend tensor without copy.
armnn::Fill
void Fill(Encoder< float > &output, const TensorShape &desiredOutputShape, const float value)
Creates a tensor and fills it with a scalar value.
Definition: Fill.cpp:13
armnn::ViewsDescriptor::GetViewSizes
const uint32_t * GetViewSizes(uint32_t idx) const
Get the view sizes at the int value idx.
Definition: Descriptors.cpp:347
armnn::AllTypesAreEqualImpl
bool AllTypesAreEqualImpl(T t1, T t2, Rest... rest)
Definition: LayerSupportRules.hpp:64
armnn::Pooling3dDescriptor::m_PoolType
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
Definition: Descriptors.hpp:473
CreateResizeAttributes
arm_compute::experimental::dynamic_fusion::ResizeAttributes CreateResizeAttributes(const armnn::ResizeDescriptor &descriptor)
Utility function used to setup an arm_compute::ResizeDescriptor object from given descriptor.
Definition: UtilsGpuFsa.cpp:64
armnn::CreateSupportedBackends
BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry &handleFactoryRegistry, BackendSettings &backendSettings)
Definition: Network.cpp:1302
armnn::ClReduceWorkloadValidate
arm_compute::Status ClReduceWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
Definition: ClReduceWorkload.cpp:18
armnn::SpaceToBatchNdDescriptor::m_PadList
std::vector< std::pair< unsigned int, unsigned int > > m_PadList
Specifies the padding values for the input dimension: heightPad{top, bottom} widthPad{left,...
Definition: Descriptors.hpp:1069
armnn::TensorShape
Definition: Tensor.hpp:20
armnn::ReverseV2
void ReverseV2(const TensorInfo &inputInfo, const TensorInfo &axisInfo, Decoder< float > &inputDecoder, Decoder< int > &axisDecoder, Encoder< float > &outputEncoder)
Definition: ReverseV2Impl.cpp:78
armnn::Convolution3dDescriptor::m_PadRight
uint32_t m_PadRight
Padding right value in the width dimension.
Definition: Descriptors.hpp:631
armnn::ConvertReductionOperationToAcl
arm_compute::ReductionOperation ConvertReductionOperationToAcl(const ReduceDescriptor &descriptor)
Definition: ArmComputeUtils.hpp:306
armnn::NeonReduceWorkloadValidate
arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
Definition: NeonReduceWorkload.cpp:19
armnn::IntersectionOverUnion
float IntersectionOverUnion(const float *boxI, const float *boxJ)
Definition: DetectionPostProcess.cpp:29
armnn::BackendOptions::Var::IsInt
bool IsInt() const
Definition: BackendOptions.hpp:113
armnn::BinaryOperation::Maximum
@ Maximum
armnn::BoostLogSeverityMapping::trace
@ trace
CreatePool2dAttributes
arm_compute::experimental::dynamic_fusion::Pool2dAttributes CreatePool2dAttributes(const Pooling2dDescriptor &descriptor)
Utility function used to setup an arm_compute::Pool2dAttributes object from given descriptor.
Definition: UtilsGpuFsa.cpp:45
armnn::DataType::Float16
@ Float16
armnn::optimizations::ConvertConstantsFloatToHalf
ConvertConstants< Float32ToFloat16, IsFloat16Layer > ConvertConstantsFloatToHalf
Definition: ConvertConstants.hpp:99
ARMNN_POLYMORPHIC_CAST_CHECK
#define ARMNN_POLYMORPHIC_CAST_CHECK(cond)
Definition: PolymorphicDowncast.hpp:27
armnn::AttemptBackendAssignment
OptimizationResult AttemptBackendAssignment(BackendSettings &backendSettings, Graph &graph, Layer *layer, BackendId backend, DataType dataTypeIn, DataType dataTypeOut, const std::vector< BackendId > &availablePreferredBackends, std::string &reasonIfUnsupported, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:844
armnn::Pooling3dDescriptor::m_PoolWidth
uint32_t m_PoolWidth
Pooling width value.
Definition: Descriptors.hpp:487
armnn::BinaryOperation::SqDiff
@ SqDiff
armnn::Pooling2dDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:427
armnn::QueueDescriptorWithParameters::m_Parameters
LayerDescriptor m_Parameters
Definition: WorkloadData.hpp:66
armnn::UnaryOperation::Rsqrt
@ Rsqrt
armnn::NeonDivisionWorkloadValidate
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: NeonDivisionWorkload.cpp:18
armnn::TensorShape::GetNumDimensions
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
armnn::DepthwiseConvolution2dDescriptor::m_DilationY
uint32_t m_DilationY
Dilation factor value for height dimension.
Definition: Descriptors.hpp:706
armnn::ComparisonOperation::Less
@ Less
CreateConv2dAttributes
Conv2dAttributes CreateConv2dAttributes(const Convolution2dDescriptor &descriptor)
Utility function used to setup an arm_compute::Conv2dAttributes object from given descriptor.
Definition: UtilsGpuFsa.cpp:14
armnn::UnaryOperation::LogicalNot
@ LogicalNot
armnn::Pooling2dDescriptor::m_PadBottom
uint32_t m_PadBottom
Padding bottom value in the height dimension.
Definition: Descriptors.hpp:413
armnn::ForEachLayerInput
void ForEachLayerInput(LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)
Definition: SubgraphViewSelector.cpp:277
armnn::Slice
void Slice(const TensorInfo &inputInfo, const SliceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
Definition: Slice.cpp:14
armnn::ViewsDescriptor::GetAxis
int32_t GetAxis() const
Get the axis value.
Definition: Descriptors.cpp:382
armnn::Pooling3dDescriptor::m_PaddingMethod
PaddingMethod m_PaddingMethod
The padding method to be used. (Exclude, IgnoreValue).
Definition: Descriptors.hpp:501
armnn::Pooling2dDescriptor::m_PadRight
uint32_t m_PadRight
Padding right value in the width dimension.
Definition: Descriptors.hpp:409
CreateDWConv2dAttributes
arm_compute::experimental::dynamic_fusion::DepthwiseConv2dAttributes CreateDWConv2dAttributes(const DepthwiseConvolution2dDescriptor &descriptor, const unsigned int aclDepthMultiplier)
Utility function used to setup an arm_compute::DepthwiseConv2dAttributes object from given descriptor...
Definition: UtilsGpuFsa.cpp:29
armnn::FactoryId
ITensorHandleFactory::FactoryId FactoryId
Definition: MockTensorHandleFactory.cpp:12
ARMNN_THROW_INVALIDARG_IF_FALSE
#define ARMNN_THROW_INVALIDARG_IF_FALSE(_cond)
Definition: Exceptions.hpp:212
armnn::InsertConvertFp32ToFp16LayersAfter
std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter(Graph &graph, Layer &layer)
Definition: NetworkUtils.cpp:79
armnn::ReverseRelocateIdx
unsigned int ReverseRelocateIdx(unsigned int idx, unsigned int inputRank, std::vector< bool > &axisFlag, std::vector< unsigned int > &dimSize, std::vector< unsigned int > &elementNumInner)
Definition: ReverseV2Impl.cpp:49
armnn::UnaryOperation::Exp
@ Exp
ARMNN_SCOPED_PROFILING_EVENT
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
armnn::BackendOptions::Var::IsString
bool IsString() const
Definition: BackendOptions.hpp:116
armnn::ArgMinMax
void ArgMinMax(Decoder< float > &in, OUT *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
Definition: ArgMinMax.cpp:17
armnn::Convolution3dDescriptor::m_DilationZ
uint32_t m_DilationZ
Dilation along z axis.
Definition: Descriptors.hpp:651
armnn::MemorySource::DmaBuf
@ DmaBuf
armnn::PaddingMethod::Exclude
@ Exclude
The padding fields don't count and are ignored.
armnn::EmptyOptional
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
armnn::CalculateGatherNdKeyIndices
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1)
Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)
Definition: WorkloadUtils.cpp:313
armnn::ReverseGetMultIdx
std::vector< unsigned int > ReverseGetMultIdx(const unsigned int idx, unsigned int inputRank, std::vector< unsigned int > &elementNumInner)
Definition: ReverseV2Impl.cpp:16
armnnUtils::FloatingPointConverter::ConvertFloat16To32
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Definition: FloatingPointConverter.cpp:43
armnn::DataType
DataType
Definition: Types.hpp:48
armnn::ReportError
void ReportError(const std::string &errorMessage, Optional< std::vector< std::string > & > errorMessages)
Definition: Network.cpp:762
armnn::LayerType::Softmax
@ Softmax
armnn::CheckFp16Support
bool CheckFp16Support(BackendsMap &backends, const std::vector< BackendId > &availablePreferredBackends)
Definition: Network.cpp:1026
armnn::PolymorphicDowncast
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
Definition: PolymorphicDowncast.hpp:74
armnn::Dimensionality::Scalar
@ Scalar
armnn::ActivationFunction::Elu
@ Elu
armnn::IsSequenceLayerType
bool IsSequenceLayerType(Layer &layer, BinaryOperation type)
Definition: SubgraphUtils.hpp:367
armnn::BackendRegistryInstance
BackendRegistry & BackendRegistryInstance()
Definition: BackendRegistry.cpp:15
armnn::PaddingMethod::IgnoreValue
@ IgnoreValue
The padding fields count, but are ignored.
armnn::IndexToCoordinates
std::vector< uint32_t > IndexToCoordinates(std::vector< uint32_t > &shape, uint32_t index)
Definition: Tile.cpp:16
armnn::InvalidArgumentException
Definition: Exceptions.hpp:80
armnn::UnaryOperation::Sin
@ Sin
armnn::optimizations::FusePermuteIntoConstLayer
OptimizeForConnection< ConstantLayer, PermuteLayer, ConvertConstPermuteLayersToConstLayers > FusePermuteIntoConstLayer
Definition: ConvertConstPermuteLayersToConstLayers.hpp:124
armnn::LayerBindingId
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:309
armnn::ConvertLstmActivationFuncToAclLayerInfo
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo(uint32_t activationFunction)
Definition: ArmComputeUtils.hpp:118
armnn::Dequantize
float Dequantize(QuantizedType value, float scale, int32_t offset)
Dequantize an 8-bit data type into a floating point data type.
Definition: TypesUtils.cpp:48
armnn::ActivationFunction::Linear
@ Linear
armnn::Convolution3dDescriptor::m_PadLeft
uint32_t m_PadLeft
Padding left value in the width dimension.
Definition: Descriptors.hpp:629
armnn::SetAllLoggingSinks
void SetAllLoggingSinks(bool standardOut, bool debugOut, bool coloured)
Definition: Logging.cpp:191
armnn::MakeOptimizations
Optimizer::Optimizations MakeOptimizations(Args &&... args)
Definition: Optimizer.hpp:43
armnn::BackendOptions::Var::AsBool
bool AsBool() const
Value getters.
Definition: BackendOptions.hpp:119
armnn::Decoder::DecodeTensor
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
armnn::Convolution3dDescriptor::m_StrideY
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
Definition: Descriptors.hpp:643
armnn::ReducedOutputOffset
unsigned int ReducedOutputOffset(const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &index, const unsigned int numAxis, const std::vector< unsigned int > &axis)
Definition: Reduce.cpp:40
armnn::LogSeverity::Fatal
@ Fatal
armnn::ReduceOperation::Sum
@ Sum
armnn::RequiresCopy
bool RequiresCopy(ITensorHandleFactory::FactoryId src, ITensorHandleFactory::FactoryId dst, TensorHandleFactoryRegistry &registry)
Definition: Network.cpp:1454
armnn::LayerType::Addition
@ Addition
ARMNN_NUMERIC_CAST_CHECK
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25
armnn::GetDataTypeSize
constexpr unsigned int GetDataTypeSize(DataType dataType)
Definition: TypesUtils.hpp:182
armnn::Softmax
void Softmax(Decoder< float > &in, Encoder< float > &out, const TensorInfo &inputTensorInfo, float beta, int axis)
Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.
Definition: Softmax.cpp:17
armnn::MemBlockStrategyType::SingleAxisPacking
@ SingleAxisPacking
armnn::ReturnWithError
OptimizationResult ReturnWithError(OptimizationResult res, const Layer *layer, const BackendSettings &backendSettings, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:786
armnn::SpaceToDepth
void SpaceToDepth(const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToDepthDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
Definition: SpaceToDepth.cpp:36
armnn::ReverseGetFlatIdx
unsigned int ReverseGetFlatIdx(const std::vector< unsigned int > &idxList, unsigned int inputRank, std::vector< unsigned int > &elementNumInner)
Definition: ReverseV2Impl.cpp:34
armnn::Convolution3dDescriptor::m_StrideX
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Definition: Descriptors.hpp:641
armnn::PermutationVector
Definition: Types.hpp:314
armnn::Status::Success
@ Success
armnn::SetNeonSliceData
auto SetNeonSliceData(const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)
Definition: NeonWorkloadUtils.hpp:160
armnn::Dimensionality::NotSpecified
@ NotSpecified
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnn::GraphEvent::LayerErased
@ LayerErased
armnnUtils::DataLayoutIndexed::GetWidthIndex
unsigned int GetWidthIndex() const
Definition: DataLayoutIndexed.hpp:25
armnn::SpaceToBatchNd
void SpaceToBatchNd(const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToBatchNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
Definition: SpaceToBatchNd.cpp:48
armnn::ConvertActivationDescriptorToAclActivationLayerInfo
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)
Definition: ArmComputeUtils.hpp:87
armnn::RuntimeException
Definition: Exceptions.hpp:120
armnn::Quantize
QuantizedType Quantize(float value, float scale, int32_t offset)
Quantize a floating point data type into an 8-bit data type.
Definition: TypesUtils.cpp:30
armnn::IsLayerSupported
bool IsLayerSupported(const armnn::Layer &layer)
Definition: MockBackend.cpp:83
armnn::ResizeMethod::NearestNeighbor
@ NearestNeighbor
armnn::GetLayerInOutDatatype
std::vector< DataType > GetLayerInOutDatatype(const Layer *layer)
Definition: Network.cpp:1017
armnn::Pooling2dDescriptor::m_PadLeft
uint32_t m_PadLeft
Padding left value in the width dimension.
Definition: Descriptors.hpp:407
armnn::ActivationFunction
ActivationFunction
Definition: Types.hpp:86
armnn::BoostLogSeverityMapping::info
@ info
armnn::BinaryOperation::Power
@ Power
armnn::Pooling3dDescriptor::m_PadFront
uint32_t m_PadFront
Padding front value in the depth dimension.
Definition: Descriptors.hpp:483
armnn::BackendCapability::AsyncExecution
@ AsyncExecution
Asynchronous Execution.
armnn::CopyTensorContentsGeneric
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Definition: WorkloadUtils.hpp:46
armnn::Tile
void Tile(const TileDescriptor &params, const TensorInfo &inputInfo, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
Definition: Tile.cpp:45
armnn::ConvertComparisonOperationToAcl
arm_compute::ComparisonOperation ConvertComparisonOperationToAcl(const ComparisonDescriptor &descriptor)
Definition: ArmComputeUtils.hpp:141
armnn::PoolingAlgorithm::Average
@ Average
armnn::ScatterNd
void ScatterNd(const TensorInfo &inputInfo, const TensorInfo &indicesInfo, const TensorInfo &updatesInfo, Decoder< float > &input, Decoder< int > &indices, Decoder< float > &updates, Encoder< float > &output, ScatterNdDescriptor descriptor)
Definition: ScatterNd.cpp:41
armnn::Decoder::Get
virtual IType Get() const =0
armnn::SetClSliceData
auto SetClSliceData(const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)
Definition: ClWorkloadUtils.hpp:100
armnn::DataType::Signed32
@ Signed32
armnn::UnaryOperation::Ceil
@ Ceil
ZeroVector
void ZeroVector(armnn::Encoder< float > &vector, uint32_t vSize)
Definition: LstmUtils.cpp:76
armnn::ShapeInferenceMethod::ValidateOnly
@ ValidateOnly
Validate all output shapes.
armnn::ShapeInferenceMethod::InferAndValidate
@ InferAndValidate
Infer missing output shapes and validate all output shapes.
armnn::ReduceOperation::Prod
@ Prod
armnn::ActivationFunction::Abs
@ Abs
armnn::BackendOptions::Var::AsString
std::string AsString() const
Definition: BackendOptions.hpp:123
armnn::DataType::QAsymmS8
@ QAsymmS8
armnn::CapabilityClass::PaddingRequired
@ PaddingRequired
armnn::Pooling3dDescriptor::m_PadRight
uint32_t m_PadRight
Padding right value in the width dimension.
Definition: Descriptors.hpp:477
armnn::ResizeMethod::Bilinear
@ Bilinear
ARMNN_FALLTHROUGH
#define ARMNN_FALLTHROUGH
Definition: Utils.hpp:36
armnn::ArgMinMaxFunction::Min
@ Min
armnn::Pad
void Pad(const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle *inputHandle, ITensorHandle *outputHandle, const PadQueueDescriptor &data)
Definition: Pad.cpp:39
armnn::LayerType::LastLayer
@ LastLayer
armnn::roundf
float roundf(float value)
Definition: Utils.hpp:43
armnn::Pooling2dDescriptor::m_StrideX
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Definition: Descriptors.hpp:419
armnn::JsonObjectType::ExecObjectDesc
@ ExecObjectDesc
armnn::ReshapeWeightsForAcl
void ReshapeWeightsForAcl(TensorInfo &weightInfo, DataLayout dataLayout)
Definition: WorkloadUtils.cpp:48
armnn::SpaceToDepthDescriptor::m_BlockSize
unsigned int m_BlockSize
Scalar specifying the input block size. It must be >= 1.
Definition: Descriptors.hpp:1092
armnn::CoordinatesToIndex
uint32_t CoordinatesToIndex(TensorShape &shape, std::vector< uint32_t > &coordinates)
Definition: Tile.cpp:32
armnn::TuningLevel::Rapid
@ Rapid
armnn::JsonObjectType::Event
@ Event
ARMNN_VERSION
#define ARMNN_VERSION
ARMNN_VERSION: "X.Y.Z" where: X = Major version number Y = Minor version number Z = Patch version num...
Definition: Version.hpp:22
armnn::UnaryOperation::Log
@ Log
armnn::Debug
void Debug(const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
Definition: Debug.cpp:97
armnn::Pooling3dDescriptor::m_PoolHeight
uint32_t m_PoolHeight
Pooling height value.
Definition: Descriptors.hpp:489
armnn::LogicalBinaryOperation::LogicalAnd
@ LogicalAnd
armnn::optimizations::OptimizeInverseConversionsFp32
OptimizeForConnection< ConvertFp32ToFp16Layer, ConvertFp16ToFp32Layer, OptimizeInverseConversionsImpl > OptimizeInverseConversionsFp32
Definition: OptimizeInverseConversions.hpp:44
armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22
armnn::TuningLevel::Normal
@ Normal
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::Resize
void Resize(Decoder< float > &in, const TensorInfo &inputInfo, Encoder< float > &out, const TensorInfo &outputInfo, DataLayoutIndexed dataLayout, ResizeMethod resizeMethod, bool alignCorners, bool halfPixelCenters)
Definition: Resize.cpp:65
FORWARD_WORKLOAD_VALIDATE_FUNC
#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported,...)
Definition: NeonLayerSupport.cpp:166
armnn::ComparisonOperation
ComparisonOperation
Definition: Types.hpp:109
armnn::ExtractJsonObjects
void ExtractJsonObjects(unsigned int inferenceIndex, const Event *parentEvent, JsonChildObject &parentObject, std::map< const Event *, std::vector< const Event * >> descendantsMap)
Definition: Profiling.cpp:314
armnn::Dimensionality::Specified
@ Specified
armnn::Status
Status
Definition: Types.hpp:42
armnn::Pooling3dDescriptor::m_PadBack
uint32_t m_PadBack
Padding back value in the depth dimension.
Definition: Descriptors.hpp:485
armnn::optimizations::TransposeAsReshape
OptimizeForType< TransposeLayer, TransposeAsReshapeImpl > TransposeAsReshape
Definition: TransposeAsReshape.hpp:77
armnn::ProfilingDetailsMethod::DetailsOnly
@ DetailsOnly
armnn::BackendOptions::Var::IsBool
bool IsBool() const
Type getters.
Definition: BackendOptions.hpp:112
armnn::Pooling3dDescriptor::m_StrideX
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Definition: Descriptors.hpp:493
armnnUtils::GetNumElementsBetween
unsigned int GetNumElementsBetween(const armnn::TensorShape &shape, unsigned int firstAxisInclusive, unsigned int lastAxisExclusive)
Definition: TensorUtils.cpp:209
armnn::BoostLogSeverityMapping::fatal
@ fatal
armnn::Pooling3dDescriptor::m_PadLeft
uint32_t m_PadLeft
Padding left value in the width dimension.
Definition: Descriptors.hpp:475
armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
armnn::Convolution3dDescriptor::m_PadBack
uint32_t m_PadBack
Padding back value in the depth dimension.
Definition: Descriptors.hpp:639
ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
armnn::Offset
unsigned int Offset(const TensorShape &shape, unsigned int batch, unsigned int height, unsigned int width, unsigned int channels, const DataLayoutIndexed &dataLayout)
Definition: BatchToSpaceNd.cpp:15
armnn::Layer::ExecuteStrategy
void ExecuteStrategy(IStrategy &strategy) const override
Apply a visitor to this layer.
Definition: Layer.cpp:571
armnn::BoostLogSeverityMapping::debug
@ debug
armnn::optimizations::FuseBatchNormIntoConvolution2DFloat16
OptimizeForExclusiveConnection< Convolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< Convolution2dLayer, armnn::DataType::Float16 > > FuseBatchNormIntoConvolution2DFloat16
Definition: FuseBatchNorm.hpp:227
armnn::StrEqual
constexpr bool StrEqual(const char *strA, const char(&strB)[N])
Definition: TypesUtils.hpp:201
armnn::BFloat16
Definition: BFloat16.hpp:15
armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14
armnn::ConvertPaddingModeToAcl
arm_compute::PaddingMode ConvertPaddingModeToAcl(const PaddingMode &paddingMode)
Definition: ArmComputeUtils.hpp:295
armnn::ViewsDescriptor::GetNumDimensions
uint32_t GetNumDimensions() const
Get the number of dimensions.
Definition: Descriptors.cpp:307
armnn::EdgeStrategy::ExportToTarget
@ ExportToTarget
Destination backend can work directly with tensors on source backend.
armnn::ConvertMaskToACLFormat
int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim)
Definition: WorkloadUtils.cpp:299
armnn::AssignBackends
OptimizationResult AssignBackends(OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView &subgraph, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:1288
LIST_OF_LAYER_TYPE
#define LIST_OF_LAYER_TYPE
This list uses X macro technique.
Definition: Types.hpp:408
armnn::ScatterNdFunction::Update
@ Update
armnn::LogSeverity::Warning
@ Warning
armnn::BinaryOperation::Minimum
@ Minimum
armnn::LayerType::Map
@ Map
armnn::optimizations::FuseBatchNormIntoDepthwiseConvolution2DFloat16
OptimizeForExclusiveConnection< DepthwiseConvolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< DepthwiseConvolution2dLayer, armnn::DataType::Float16 > > FuseBatchNormIntoDepthwiseConvolution2DFloat16
Definition: FuseBatchNorm.hpp:237
armnn::IsQuantizedType
constexpr bool IsQuantizedType()
Definition: TypesUtils.hpp:311
armnn::Convolution3dDescriptor::m_DilationY
uint32_t m_DilationY
Dilation along y axis.
Definition: Descriptors.hpp:649
armnn::BackendsMap
std::map< BackendId, std::unique_ptr< class IBackendInternal > > BackendsMap
Definition: Network.hpp:285
armnn::Compute::CpuAcc
@ CpuAcc
CPU Execution: NEON: ArmCompute.
VectorVectorCwiseProductAccumulate
void VectorVectorCwiseProductAccumulate(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:204
armnn::ProfilingDetailsMethod::DetailsWithEvents
@ DetailsWithEvents
armnn::ActivationFunction::ReLu
@ ReLu
armnn::LayerType::MemCopy
@ MemCopy
armnn::optimizations::SquashEqualTransposeSiblings
OptimizeForConnection< Layer, TransposeLayer, SquashEqualSiblingsImpl< TransposeLayer > > SquashEqualTransposeSiblings
Definition: SquashEqualSiblings.hpp:69
armnn::TensorInfo::SetShape
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:195
armnn::IsLayerSupported
bool IsLayerSupported(const armnn::Layer *layer)
Definition: MockBackend.cpp:62
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnnUtils::DataLayoutIndexed::GetChannelsIndex
unsigned int GetChannelsIndex() const
Definition: DataLayoutIndexed.hpp:23
armnn::optimizations::ConvertConstantsHalfToFloat
ConvertConstants< Float16ToFloat32, IsFloat32Layer > ConvertConstantsHalfToFloat
Definition: ConvertConstants.hpp:98
armnn::Convolution3dDescriptor::m_StrideZ
uint32_t m_StrideZ
Stride value when proceeding through input for the depth dimension.
Definition: Descriptors.hpp:645
armnn::DepthToSpace
void DepthToSpace(const TensorInfo &inputInfo, const DepthToSpaceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
Definition: DepthToSpace.cpp:16
armnn::TopKSort
void TopKSort(unsigned int k, unsigned int *indices, const float *values, unsigned int numElement)
Definition: DetectionPostProcess.cpp:23
armnn::ViewsDescriptor::GetNumViews
uint32_t GetNumViews() const
Get the number of views.
Definition: Descriptors.cpp:302
armnn::StridedSlice
void StridedSlice(const TensorInfo &inputInfo, const StridedSliceDescriptor &params, const void *inputData, void *outputData, unsigned int dataTypeSize)
Definition: StridedSlice.cpp:86
armnn::optimizations::SquashEqualPermuteSiblings
OptimizeForConnection< Layer, PermuteLayer, SquashEqualSiblingsImpl< PermuteLayer > > SquashEqualPermuteSiblings
Definition: SquashEqualSiblings.hpp:67
armnn::BoostLogSeverityMapping::warning
@ warning
armnn::ComputeReductionTensorShape
const TensorInfo ComputeReductionTensorShape(const armnn::TensorInfo &input, const std::vector< uint32_t > &vAxis, const bool keepDims)
Function to compute the output tensor shape based on the axes and if keepDims is set.
Definition: ArmComputeUtils.hpp:320
armnn::ActivationFunction::Square
@ Square
SetActivationParameters
void SetActivationParameters(uint32_t activation, armnn::ActivationFunction &outArmnnActivation, float &outA, float &outB)
Definition: LstmUtils.cpp:258
armnn::LayerType::Input
@ Input
armnn::NonMaxSuppression
std::vector< unsigned int > NonMaxSuppression(unsigned int numBoxes, const std::vector< float > &boxCorners, const std::vector< float > &scores, float nmsScoreThreshold, unsigned int maxDetection, float nmsIouThreshold)
Definition: DetectionPostProcess.cpp:48
armnn::ModelOptions
std::vector< BackendOptions > ModelOptions
Definition: BackendOptions.hpp:18
armnn::optimizations::FoldPadIntoDepthwiseConvolution2d
OptimizeForExclusiveConnection< PadLayer, DepthwiseConvolution2dLayer, pad_fold::FoldPadIntoDepthwiseConvolution2dImpl > FoldPadIntoDepthwiseConvolution2d
Definition: FoldPadIntoLayer2d.hpp:281
armnn::TuningLevel
TuningLevel
Definition: ArmComputeTuningUtils.hpp:18
armnn::Activation
float Activation(float in, ActivationFunction function, float a, float b)
Definition: Activation.cpp:13
armnn::FuseLayer
LayerType * FuseLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
Definition: ArmComputeSubgraphUtils.hpp:53
armnn::NeonElementwiseBinaryWorkloadValidate
arm_compute::Status NeonElementwiseBinaryWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ElementwiseBinaryDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Definition: NeonElementwiseBinaryWorkload.cpp:20
armnn::CapabilityClass::CapabilityClassMax
@ CapabilityClassMax
armnnUtils::Filesystem::CreateDirectory
std::string CreateDirectory(std::string sPath)
Returns full path to temporary folder.
Definition: Filesystem.cpp:47
armnn::SetValueChecked
void SetValueChecked(Optional< T & > optionalRef, V &&val)
Definition: LayerSupportCommon.hpp:17
armnn::optimizations::MaxMinIntoBoundedRelu
OptimizeForExclusiveConnection< ElementwiseBinaryLayer, ElementwiseBinaryLayer, MaxMinIntoBoundedReluImpl > MaxMinIntoBoundedRelu
Definition: MaxMinIntoBoundedRelu.hpp:134
armnnUtils::GetTensorInfo
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Definition: TensorUtils.cpp:40
armnn::optimizations::SquashEqualReshapeSiblings
OptimizeForConnection< Layer, ReshapeLayer, SquashEqualSiblingsImpl< ReshapeLayer > > SquashEqualReshapeSiblings
Definition: SquashEqualSiblings.hpp:70
armnn::LogSeverity
LogSeverity
Definition: Utils.hpp:13
armnn::BinaryOperation::Div
@ Div
armnn::OutputShapeRounding::Ceiling
@ Ceiling
armnn::DataType::Signed64
@ Signed64
armnn::LayerType::Convolution2d
@ Convolution2d
armnn::LayerType::FirstLayer
@ FirstLayer
armnn::GetComputeDeviceAsCString
constexpr char const * GetComputeDeviceAsCString(Compute compute)
Deprecated function that will be removed together with the Compute enum.
Definition: BackendId.hpp:34
armnn::Optimize
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptionsOpaque &options=OptimizerOptionsOpaque(), Optional< std::vector< std::string > & > messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:2145
armnn::BatchToSpaceNdDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:902
armnn::ComparisonOperation::Equal
@ Equal
armnn::optimizations::OptimizeInverseTransposes
OptimizeForConnection< TransposeLayer, TransposeLayer, OptimizeInversePermutesImpl< TransposeLayer > > OptimizeInverseTransposes
Definition: OptimizeInversePermutes.hpp:45
armnnUtils::DataLayoutIndexed::GetIndex
unsigned int GetIndex(const armnn::TensorShape &shape, unsigned int batchIndex, unsigned int channelIndex, unsigned int heightIndex, unsigned int widthIndex) const
Definition: DataLayoutIndexed.hpp:28
armnn::DepthwiseConvolution2dDescriptor::m_DilationX
uint32_t m_DilationX
Dilation factor value for width dimension.
Definition: Descriptors.hpp:704
armnn::NormalizationAlgorithmMethod::LocalContrast
@ LocalContrast
Jarret 2009: Local Contrast Normalization.
armnn::optimizations::AddBroadcastReshapeLayer
OptimizeForType< Layer, AddBroadcastReshapeLayerImpl > AddBroadcastReshapeLayer
Definition: AddBroadcastReshapeLayer.hpp:94
armnn::Pooling3dDescriptor::m_PoolDepth
uint32_t m_PoolDepth
Pooling depth value.
Definition: Descriptors.hpp:491
armnn::NullPointerException
Definition: Exceptions.hpp:146
armnn::swap
void swap(ViewsDescriptor &first, ViewsDescriptor &second)
Definition: Descriptors.cpp:366
armnnUtils::GetTensorShape
armnn::TensorShape GetTensorShape(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout)
Definition: TensorUtils.cpp:21
armnn::TensorShape::GetDimensionality
Dimensionality GetDimensionality() const
Function that returns the tensor type.
Definition: Tensor.hpp:92
armnn::optimizations::FuseBatchNormIntoDepthwiseConvolution2DFloat32
OptimizeForExclusiveConnection< DepthwiseConvolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< DepthwiseConvolution2dLayer, armnn::DataType::Float32 > > FuseBatchNormIntoDepthwiseConvolution2DFloat32
Definition: FuseBatchNorm.hpp:232
armnn::PaddingMode::Reflect
@ Reflect
armnn::CopyArmComputeTensorData
void CopyArmComputeTensorData(arm_compute::Tensor &dstTensor, const T *srcData)
Definition: NeonWorkloadUtils.hpp:63
armnn::LayerType
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition: Types.hpp:491
armnn::optimizations::OptimizeConsecutiveReshapes
OptimizeForConnection< ReshapeLayer, ReshapeLayer, OptimizeConsecutiveReshapesImpl > OptimizeConsecutiveReshapes
Definition: OptimizeConsecutiveReshapes.hpp:61
armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo
arm_compute::FullyConnectedLayerInfo ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor &fullyConnectedDesc, const ActivationDescriptor *activationDesc)
Definition: ArmComputeUtils.hpp:194
armnn::TensorShape::GetNumElements
unsigned int GetNumElements() const
Function that calculates the tensor elements by multiplying all dimension size which are Specified.
Definition: Tensor.cpp:181
armnn::ScatterOperation
float ScatterOperation(ScatterNdFunction operation, float input, float update)
Definition: ScatterNd.cpp:18
armnn::CheckScaleSetOnQuantizedType
bool CheckScaleSetOnQuantizedType(Layer *layer, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:801
armnn::OptionalReferenceSwitch< std::is_reference< T >::value, T >::value
const T & value() const
Definition: Optional.hpp:146
armnn::ConvertActivationFunctionToAclActivationFunction
arm_compute::ActivationLayerInfo::ActivationFunction ConvertActivationFunctionToAclActivationFunction(ActivationFunction armnnFunction)
Definition: ArmComputeUtils.hpp:62
armnn::PaddingMode::Constant
@ Constant
armnn::ComputeAclAxis
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank,...
Definition: ArmComputeUtils.hpp:246
armnn::Pooling2dDescriptor::m_PoolType
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
Definition: Descriptors.hpp:405
armnn::Status::Failure
@ Failure
armnn::WrapClError
RuntimeException WrapClError(const cl::Error &clError, const CheckLocation &location)
Definition: ClWorkloadUtils.hpp:159
armnn::Gather
void Gather(const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo, Decoder< float > &params, const int32_t *indices, Encoder< float > &output, const int32_t axis_int)
Definition: Gather.cpp:15
armnn::OptionalBase::has_value
bool has_value() const noexcept
Definition: Optional.hpp:53
armnn::LayerType::Output
@ Output
armnn::LayerType::Constant
@ Constant
armnn::GetCapability
Optional< const BackendOptions::BackendOption > GetCapability(const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
Definition: BackendHelper.cpp:37
armnn::DataLayout::NCHW
@ NCHW
armnn::IOptimizedNetwork::pOptimizedNetworkImpl
std::unique_ptr< OptimizedNetworkImpl > pOptimizedNetworkImpl
Definition: INetwork.hpp:953
armnn::AssignBackendsIConnectable
void AssignBackendsIConnectable(OptimizedNetworkImpl *optNetObjPtr, IConnectableLayer *it, Optional< std::vector< std::string > & > errMessages, OptimizationResult &result, BackendSettings &backendSettings, std::vector< BackendId > &availablePreferredBackends)
Definition: Network.cpp:1073
armnn::ActivationFunction::Sigmoid
@ Sigmoid
ARMNN_THROW_INVALIDARG_MSG_IF_FALSE
#define ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(_cond, _str)
Definition: Exceptions.hpp:210
armnn::HasMatchingCapability
bool HasMatchingCapability(const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
Convenience function to check if a given capability matches a capability in a BackendCapabilities str...
Definition: BackendHelper.cpp:85
VectorBatchVectorAssign
void VectorBatchVectorAssign(armnn::Decoder< float > &vector, uint32_t vSize, uint32_t nBatch, armnn::Encoder< float > &outBatchVector)
Definition: LstmUtils.cpp:113
armnn::SpaceToDepthDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:1095
armnn::ComparisonOperation::Greater
@ Greater
armnn::DetectionPostProcess
void DetectionPostProcess(const TensorInfo &boxEncodingsInfo, const TensorInfo &scoresInfo, const TensorInfo &, const TensorInfo &detectionBoxesInfo, const TensorInfo &, const TensorInfo &, const TensorInfo &, const DetectionPostProcessDescriptor &desc, Decoder< float > &boxEncodings, Decoder< float > &scores, Decoder< float > &anchors, float *detectionBoxes, float *detectionClasses, float *detectionScores, float *numDetections)
Definition: DetectionPostProcess.cpp:139
armnn::NextIndex
bool NextIndex(const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &current)
Definition: Reduce.cpp:19