ArmNN
 24.02
armnn Namespace Reference

Copyright (c) 2021 ARM Limited and Contributors. More...

Namespaces

 experimental
 
 optimizations
 
 profiling
 
 stringUtils
 
 timelinedecoder
 
 utility
 

Classes

struct  abs
 
class  AbsLayer
 
struct  AbsQueueDescriptor
 
struct  ActivationDescriptor
 An ActivationDescriptor for the ActivationLayer. More...
 
class  ActivationLayer
 This layer represents an activation operation with the specified activation function. More...
 
struct  ActivationQueueDescriptor
 
class  AddedLayerObservable
 
class  AdditionLayer
 This layer represents an addition operation. More...
 
struct  AdditionQueueDescriptor
 
struct  Allocator
 
struct  ArgMinMaxDescriptor
 An ArgMinMaxDescriptor for ArgMinMaxLayer. More...
 
class  ArgMinMaxLayer
 This layer represents a ArgMinMax operation. More...
 
struct  ArgMinMaxQueueDescriptor
 
class  ArmNNProfilingServiceInitialiser
 
class  BackendCapabilityException
 
class  BackendId
 
struct  BackendOptions
 Struct for the users to pass backend specific options. More...
 
class  BackendRegistry
 
struct  BackendSettings
 
class  BackendUnavailableException
 Class for non-fatal exceptions raised while initialising a backend. More...
 
struct  BackendVersion
 
class  BadOptionalAccessException
 
struct  BaseDescriptor
 Base class for all descriptors. More...
 
class  BaseIterator
 
class  BaseMemoryManager
 
class  BaseTensor
 
class  BaseWorkload
 
class  BatchMatMul
 
struct  BatchMatMulDescriptor
 A BatchMatMulDescriptor for the BatchMatMul operator. More...
 
class  BatchMatMulLayer
 
struct  BatchMatMulQueueDescriptor
 
struct  BatchNormalizationDescriptor
 A BatchNormalizationDescriptor for the BatchNormalizationLayer. More...
 
class  BatchNormalizationLayer
 This layer represents a batch normalization operation. More...
 
struct  BatchNormalizationQueueDescriptor
 
struct  BatchToSpaceNdDescriptor
 A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer. More...
 
class  BatchToSpaceNdLayer
 This layer represents a BatchToSpaceNd operation. More...
 
struct  BatchToSpaceNdQueueDescriptor
 
class  BFloat16
 
struct  BiasAndWeightsTypesCompatible
 
struct  BiasAndWeightsTypesMatch
 
class  BindableLayer
 
class  BooleanDecoder
 
class  BooleanDecoderBool
 
class  BooleanEncoder
 
struct  BroadcastLoop
 
struct  BroadcastToDescriptor
 
class  BroadcastToLayer
 
struct  BroadcastToQueueDescriptor
 
struct  BufferStorage
 
struct  Capability
 Capability of the TensorHandleFactory. More...
 
class  CastLayer
 This layer represents a cast operation. More...
 
struct  CastQueueDescriptor
 
struct  ceil
 
struct  ChannelShuffleDescriptor
 A ChannelShuffleDescriptor for the ChannelShuffle operator. More...
 
class  ChannelShuffleLayer
 
struct  ChannelShuffleQueueDescriptor
 
struct  CheckLocation
 
class  ClAbsWorkload
 
class  ClActivationWorkload
 
class  ClAdditionWorkload
 
class  ClArgMinMaxWorkload
 
class  ClBackend
 
class  ClBackendContext
 
class  ClBackendDefaultAllocator
 Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...
 
class  ClBackendModelContext
 The ClBackendModelContext is used to pass in CL specific backend ModelOptions. More...
 
class  ClBaseWorkload
 
class  ClBatchMatMulWorkload
 
class  ClBatchNormalizationFloatWorkload
 
class  ClBatchToSpaceNdWorkload
 
class  ClCastWorkload
 
class  ClChannelShuffleWorkload
 
class  ClComparisonWorkload
 
class  ClConcatWorkload
 
class  ClConstantWorkload
 
struct  ClContextBuilder
 
class  ClContextControl
 
class  ClContextDeserializer
 
class  ClContextSerializer
 
class  ClConvertFp16ToFp32Workload
 
class  ClConvertFp32ToFp16Workload
 
class  ClConvolution2dWorkload
 
class  ClConvolution3dWorkload
 
class  ClDepthToSpaceWorkload
 
class  ClDepthwiseConvolutionWorkload
 
class  ClDequantizeWorkload
 
class  ClDivisionWorkload
 
class  ClElementwiseBinaryWorkload
 
class  ClExpWorkload
 
class  ClFillWorkload
 
class  ClFloorFloatWorkload
 
class  ClFullyConnectedWorkload
 
class  ClGatherNdWorkload
 
class  ClGatherWorkload
 
class  ClImportSubTensorHandle
 
class  ClImportTensorHandle
 
class  ClImportTensorHandleFactory
 This factory creates ClImportTensorHandles that refer to imported memory tensors. More...
 
class  ClInstanceNormalizationWorkload
 
class  ClL2NormalizationFloatWorkload
 
class  ClLayerSupport
 
class  ClLogicalAndWorkload
 
class  ClLogicalNotWorkload
 
class  ClLogicalOrWorkload
 
class  ClLogSoftmaxWorkload
 
class  ClLogWorkload
 
class  ClLstmFloatWorkload
 
class  ClMaximumWorkload
 
class  ClMeanWorkload
 
class  ClMemoryManager
 
class  ClMinimumWorkload
 
class  ClMultiplicationWorkload
 
class  ClNegWorkload
 
class  ClNormalizationFloatWorkload
 
class  ClPadWorkload
 
class  ClPermuteWorkload
 
class  ClPooling2dWorkload
 
class  ClPooling3dWorkload
 
class  ClPreluWorkload
 
class  ClQLstmWorkload
 
class  ClQuantizedLstmWorkload
 
class  ClQuantizeWorkload
 
struct  ClRankWorkload
 
class  ClReduceWorkload
 
class  ClReshapeWorkload
 
class  ClResizeWorkload
 
class  ClReverseV2Workload
 
class  ClRsqrtWorkload
 
class  ClRuntimeUnavailableException
 
class  ClSinWorkload
 
class  ClSliceWorkload
 
class  ClSoftmaxWorkload
 
class  ClSpaceToBatchNdWorkload
 
class  ClSpaceToDepthWorkload
 
class  ClSplitterWorkload
 
class  ClSqrtWorkload
 
class  ClStackWorkload
 
class  ClStridedSliceWorkload
 
class  ClSubTensorHandle
 
class  ClSubtractionWorkload
 
class  ClTensorDecorator
 ClTensorDecorator wraps an existing CL tensor allowing us to override the TensorInfo for it. More...
 
class  ClTensorHandle
 
class  ClTensorHandleDecorator
 
class  ClTensorHandleFactory
 
class  ClTileWorkload
 
class  ClTransposeConvolution2dWorkload
 
class  ClTransposeWorkload
 
class  ClTunedParameters
 
class  ClUnidirectionalSequenceLstmFloatWorkload
 
class  ClWorkloadFactory
 
struct  ComparisonDescriptor
 A ComparisonDescriptor for the ComparisonLayer. More...
 
class  ComparisonLayer
 This layer represents a comparison operation. More...
 
struct  ComparisonQueueDescriptor
 
class  ConcatLayer
 This layer represents a merge operation. More...
 
struct  ConcatQueueDescriptor
 
class  ConstantLayer
 A layer that the constant data can be bound to. More...
 
class  ConstantMemoryStrategy
 
struct  ConstantQueueDescriptor
 
class  ConstPassthroughTensorHandle
 
struct  ConstructInPlace
 Disambiguation tag that can be passed to the constructor to indicate that the contained object should be constructed in-place. More...
 
class  ConstTensor
 A tensor defined by a TensorInfo (shape and data type) and an immutable backing store. More...
 
class  ConstTensorHandle
 
class  ConvertFp16ToFp32Layer
 This layer converts data type Float 16 to Float 32. More...
 
struct  ConvertFp16ToFp32QueueDescriptor
 
class  ConvertFp32ToFp16Layer
 This layer converts data type Float 32 to Float 16. More...
 
struct  ConvertFp32ToFp16QueueDescriptor
 
struct  Convolution2dDescriptor
 A Convolution2dDescriptor for the Convolution2dLayer. More...
 
class  Convolution2dLayer
 This layer represents a convolution 2d operation. More...
 
struct  Convolution2dQueueDescriptor
 
struct  Convolution3dDescriptor
 A Convolution3dDescriptor for the Convolution3dLayer. More...
 
class  Convolution3dLayer
 This layer represents a convolution 3d operation. More...
 
struct  Convolution3dQueueDescriptor
 
class  CopyMemGenericWorkload
 
class  DebugLayer
 This layer visualizes the data flowing through the network. More...
 
struct  DebugQueueDescriptor
 
class  Decoder
 
class  DefaultAllocator
 Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...
 
class  DepthToSpaceLayer
 This layer represents a DepthToSpace operation. More...
 
struct  DepthToSpaceQueueDescriptor
 
struct  DepthwiseConvolution2dDescriptor
 A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer. More...
 
class  DepthwiseConvolution2dLayer
 This layer represents a depthwise convolution 2d operation. More...
 
struct  DepthwiseConvolution2dQueueDescriptor
 Depthwise Convolution 2D layer workload data. More...
 
class  DequantizeLayer
 This layer dequantizes the input tensor. More...
 
struct  DequantizeQueueDescriptor
 
struct  DetectionPostProcessDescriptor
 
class  DetectionPostProcessLayer
 This layer represents a detection postprocess operator. More...
 
struct  DetectionPostProcessQueueDescriptor
 
class  DeviceSpec
 
class  DivisionLayer
 This layer represents a division operation. More...
 
struct  DivisionQueueDescriptor
 
class  DotAttributeSet
 
class  DotBase
 
class  DotDefaults
 
class  DotEdge
 
class  DotGraph
 
class  DotNode
 
class  DynamicBackend
 
class  DynamicBackendUtils
 
class  ElementwiseBaseLayer
 NOTE: this is an abstract class to encapsulate the element wise operations, it does not implement: std::unique_ptr<IWorkload> Layer::CreateWorkload(const IWorkloadFactory& factory) const = 0; Layer* Clone(Graph& graph) const = 0;. More...
 
struct  ElementwiseBinaryDescriptor
 A ElementwiseBinaryDescriptor for the ElementwiseBinaryLayer. More...
 
struct  ElementwiseBinaryFunction
 
class  ElementwiseBinaryLayer
 This layer represents a elementwiseBinary operation. More...
 
struct  ElementwiseBinaryQueueDescriptor
 
struct  ElementwiseUnaryDescriptor
 A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer. More...
 
struct  ElementwiseUnaryFunction
 
class  ElementwiseUnaryLayer
 This layer represents a elementwiseUnary operation. More...
 
struct  ElementwiseUnaryQueueDescriptor
 
struct  EmptyOptional
 EmptyOptional is used to initialize the Optional class in case we want to have default value for an Optional in a function declaration. More...
 
class  Encoder
 
struct  EqualQueueDescriptor
 
class  ErasedLayerNamesObservable
 
class  Event
 Event class records measurements reported by BeginEvent()/EndEvent() and returns measurements when Event::GetMeasurements() is called. More...
 
class  Exception
 Base class for all ArmNN exceptions so that users can filter to just those. More...
 
class  ExecutionFrame
 
struct  exp
 
struct  FakeQuantizationDescriptor
 A FakeQuantizationDescriptor for the FakeQuantizationLayer. More...
 
class  FakeQuantizationLayer
 This layer represents a fake quantization operation. More...
 
struct  FakeQuantizationQueueDescriptor
 
class  FileNotFoundException
 
struct  FillDescriptor
 A FillDescriptor for the FillLayer. More...
 
class  FillLayer
 This layer represents a fill operation. More...
 
struct  FillQueueDescriptor
 
class  FirstInputTypedWorkload
 
struct  FLATBUFFERS_FINAL_CLASS
 
class  Float16Decoder
 
class  Float16Encoder
 
class  Float32Decoder
 
class  Float32Encoder
 
class  FloorLayer
 This layer represents a floor operation. More...
 
struct  FloorQueueDescriptor
 
struct  FullyConnectedDescriptor
 A FullyConnectedDescriptor for the FullyConnectedLayer. More...
 
class  FullyConnectedLayer
 This layer represents a fully connected operation. More...
 
struct  FullyConnectedQueueDescriptor
 
struct  FusedDescriptor
 A FusedDescriptor for the FusedLayer. More...
 
class  FusedLayer
 
struct  FusedQueueDescriptor
 
struct  GatherDescriptor
 A GatherDescriptor for the GatherLayer. More...
 
class  GatherLayer
 This layer represents a Gather operator. More...
 
class  GatherNdLayer
 This layer represents a GatherNd operator. More...
 
struct  GatherNdQueueDescriptor
 
struct  GatherQueueDescriptor
 
class  GpuFsaBackend
 
class  GpuFsaBackendContext
 
class  GpuFsaBackendDefaultAllocator
 Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...
 
class  GpuFsaBaseWorkload
 
class  GpuFsaConstantWorkload
 
class  GpuFsaContextControl
 
class  GpuFsaLayerSupport
 
class  GpuFsaMemoryManager
 
struct  GpuFsaPreCompiledBlob
 A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend. More...
 
class  GpuFsaPreCompiledWorkload
 
class  GpuFsaSubTensorHandle
 
class  GpuFsaTensorHandle
 
class  GpuFsaTensorHandleFactory
 
class  GpuFsaWorkloadFactory
 
class  Graph
 
class  GraphObservable
 
class  GraphValidationException
 
struct  GreaterQueueDescriptor
 
class  HtmlBold
 
class  HtmlFont
 
class  HtmlSection
 
class  HtmlSimpleTag
 
class  IAclTensorHandle
 
class  IBackend
 Each backend should implement an IBackend. More...
 
class  IBackendContext
 
class  IBackendInternal
 
class  IBackendModelContext
 
class  IClTensorHandle
 
class  ICLTensorProxy
 
class  IConnectableLayer
 Interface for a layer that is connectable to other layers via InputSlots and OutputSlots. More...
 
class  ICustomAllocator
 Custom Allocator interface. More...
 
class  IDeviceSpec
 Device specific knowledge to be passed to the optimizer. More...
 
class  IExecutionFrame
 ExecutionFrame interface to enqueue a workload computation. More...
 
class  IGpuAccTunedParameters
 Manages a set of GpuAcc parameters which have been tuned for maximum performance. More...
 
class  IGraphObservable
 
class  IInputSlot
 An input connection slot for a layer. More...
 
class  ILayerSupport
 
class  IMemoryManager
 
class  IMemoryOptimizerStrategy
 
struct  IMemoryOptimizerStrategyFactory
 
class  ImportMemGenericWorkload
 
class  INetwork
 Main network class which provides the interface for building up a neural network. More...
 
struct  INetworkProperties
 
class  InputLayer
 A layer user-provided data can be bound to (e.g. inputs, outputs). More...
 
class  InputSlot
 
struct  InstanceNormalizationDescriptor
 An InstanceNormalizationDescriptor for InstanceNormalizationLayer. More...
 
class  InstanceNormalizationLayer
 This layer represents an instance normalization operation. More...
 
struct  InstanceNormalizationQueueDescriptor
 
class  Instrument
 
class  Int32Decoder
 
class  Int32Encoder
 
class  Int32ToInt32tDecoder
 
class  Int32ToInt32tEncoder
 
class  Int64Decoder
 
class  Int64Encoder
 
class  InvalidArgumentException
 
class  IOptimizedNetwork
 
class  IOutputSlot
 An output connection slot for a layer. More...
 
class  IProfiler
 
class  IRuntime
 
struct  IsHalfType
 
struct  IsMemorySource
 
struct  IsMemorySource< MemorySource >
 
class  IStrategy
 
class  ISubgraphViewConverter
 
class  ITensorHandle
 
class  ITensorHandleFactory
 
class  IWorkload
 Workload interface to enqueue a layer computation. More...
 
class  IWorkloadFactory
 
struct  JsonChildObject
 
class  JsonPrinter
 
class  JsonUtils
 
struct  L2NormalizationDescriptor
 A L2NormalizationDescriptor for the L2NormalizationLayer. More...
 
class  L2NormalizationLayer
 This layer represents a L2 normalization operation. More...
 
struct  L2NormalizationQueueDescriptor
 
class  Layer
 
class  LayerSupportBase
 
class  LayerSupportHandle
 
struct  LayerTypeOfImpl
 
struct  LayerTypeOfImpl< LayerType::Activation >
 
struct  LayerTypeOfImpl< LayerType::Addition >
 
struct  LayerTypeOfImpl< LayerType::ArgMinMax >
 
struct  LayerTypeOfImpl< LayerType::BatchMatMul >
 
struct  LayerTypeOfImpl< LayerType::BatchNormalization >
 
struct  LayerTypeOfImpl< LayerType::BatchToSpaceNd >
 
struct  LayerTypeOfImpl< LayerType::BroadcastTo >
 
struct  LayerTypeOfImpl< LayerType::Cast >
 
struct  LayerTypeOfImpl< LayerType::ChannelShuffle >
 
struct  LayerTypeOfImpl< LayerType::Comparison >
 
struct  LayerTypeOfImpl< LayerType::Concat >
 
struct  LayerTypeOfImpl< LayerType::Constant >
 
struct  LayerTypeOfImpl< LayerType::ConvertFp16ToFp32 >
 
struct  LayerTypeOfImpl< LayerType::ConvertFp32ToFp16 >
 
struct  LayerTypeOfImpl< LayerType::Convolution2d >
 
struct  LayerTypeOfImpl< LayerType::Convolution3d >
 
struct  LayerTypeOfImpl< LayerType::Debug >
 
struct  LayerTypeOfImpl< LayerType::DepthToSpace >
 
struct  LayerTypeOfImpl< LayerType::DepthwiseConvolution2d >
 
struct  LayerTypeOfImpl< LayerType::Dequantize >
 
struct  LayerTypeOfImpl< LayerType::DetectionPostProcess >
 
struct  LayerTypeOfImpl< LayerType::Division >
 
struct  LayerTypeOfImpl< LayerType::ElementwiseBinary >
 
struct  LayerTypeOfImpl< LayerType::ElementwiseUnary >
 
struct  LayerTypeOfImpl< LayerType::FakeQuantization >
 
struct  LayerTypeOfImpl< LayerType::Fill >
 
struct  LayerTypeOfImpl< LayerType::Floor >
 
struct  LayerTypeOfImpl< LayerType::FullyConnected >
 
struct  LayerTypeOfImpl< LayerType::Fused >
 
struct  LayerTypeOfImpl< LayerType::Gather >
 
struct  LayerTypeOfImpl< LayerType::GatherNd >
 
struct  LayerTypeOfImpl< LayerType::Input >
 
struct  LayerTypeOfImpl< LayerType::InstanceNormalization >
 
struct  LayerTypeOfImpl< LayerType::L2Normalization >
 
struct  LayerTypeOfImpl< LayerType::LogicalBinary >
 
struct  LayerTypeOfImpl< LayerType::LogSoftmax >
 
struct  LayerTypeOfImpl< LayerType::Lstm >
 
struct  LayerTypeOfImpl< LayerType::Map >
 
struct  LayerTypeOfImpl< LayerType::Maximum >
 
struct  LayerTypeOfImpl< LayerType::Mean >
 
struct  LayerTypeOfImpl< LayerType::MemCopy >
 
struct  LayerTypeOfImpl< LayerType::MemImport >
 
struct  LayerTypeOfImpl< LayerType::Merge >
 
struct  LayerTypeOfImpl< LayerType::Minimum >
 
struct  LayerTypeOfImpl< LayerType::Multiplication >
 
struct  LayerTypeOfImpl< LayerType::Normalization >
 
struct  LayerTypeOfImpl< LayerType::Output >
 
struct  LayerTypeOfImpl< LayerType::Pad >
 
struct  LayerTypeOfImpl< LayerType::Permute >
 
struct  LayerTypeOfImpl< LayerType::Pooling2d >
 
struct  LayerTypeOfImpl< LayerType::Pooling3d >
 
struct  LayerTypeOfImpl< LayerType::PreCompiled >
 
struct  LayerTypeOfImpl< LayerType::Prelu >
 
struct  LayerTypeOfImpl< LayerType::QLstm >
 
struct  LayerTypeOfImpl< LayerType::Quantize >
 
struct  LayerTypeOfImpl< LayerType::QuantizedLstm >
 
struct  LayerTypeOfImpl< LayerType::Rank >
 
struct  LayerTypeOfImpl< LayerType::Reduce >
 
struct  LayerTypeOfImpl< LayerType::Reshape >
 
struct  LayerTypeOfImpl< LayerType::Resize >
 
struct  LayerTypeOfImpl< LayerType::ReverseV2 >
 
struct  LayerTypeOfImpl< LayerType::Shape >
 
struct  LayerTypeOfImpl< LayerType::Slice >
 
struct  LayerTypeOfImpl< LayerType::Softmax >
 
struct  LayerTypeOfImpl< LayerType::SpaceToBatchNd >
 
struct  LayerTypeOfImpl< LayerType::SpaceToDepth >
 
struct  LayerTypeOfImpl< LayerType::Splitter >
 
struct  LayerTypeOfImpl< LayerType::Stack >
 
struct  LayerTypeOfImpl< LayerType::StandIn >
 
struct  LayerTypeOfImpl< LayerType::StridedSlice >
 
struct  LayerTypeOfImpl< LayerType::Subtraction >
 
struct  LayerTypeOfImpl< LayerType::Switch >
 
struct  LayerTypeOfImpl< LayerType::Tile >
 
struct  LayerTypeOfImpl< LayerType::Transpose >
 
struct  LayerTypeOfImpl< LayerType::TransposeConvolution2d >
 
struct  LayerTypeOfImpl< LayerType::UnidirectionalSequenceLstm >
 
struct  LayerTypeOfImpl< LayerType::Unmap >
 
class  LayerValidationException
 
class  LayerWithParameters
 
class  LoadedNetwork
 
struct  log
 
struct  LogicalBinaryDescriptor
 A LogicalBinaryDescriptor for the LogicalBinaryLayer. More...
 
struct  LogicalBinaryFunction
 
class  LogicalBinaryLayer
 This layer represents a Logical Binary operation. More...
 
struct  LogicalBinaryQueueDescriptor
 
struct  LogicalUnaryFunction
 
class  LogSink
 
class  LogSoftmaxLayer
 This layer represents a log softmax operation. More...
 
struct  LogSoftmaxQueueDescriptor
 
struct  LstmBasicParameters
 
struct  LstmDescriptor
 An LstmDescriptor for the LstmLayer. More...
 
struct  LstmInputParams
 
struct  LstmInputParamsInfo
 
class  LstmLayer
 This layer represents a LSTM operation. More...
 
struct  LstmOptCifgParameters
 
struct  LstmOptLayerNormParameters
 
struct  LstmOptPeepholeParameters
 
struct  LstmOptProjectionParameters
 
struct  LstmQueueDescriptor
 
class  ManagedConstTensorHandle
 
class  MapLayer
 This layer represents a memory copy operation. More...
 
struct  MapQueueDescriptor
 
class  MapWorkload
 
struct  maximum
 
class  MaximumLayer
 This layer represents a maximum operation. More...
 
struct  MaximumQueueDescriptor
 
struct  MeanDescriptor
 A MeanDescriptor for the MeanLayer. More...
 
class  MeanLayer
 This layer represents a mean operation. More...
 
struct  MeanQueueDescriptor
 
struct  Measurement
 
struct  MemBin
 
struct  MemBlock
 
class  MemCopyLayer
 This layer represents a memory copy operation. More...
 
struct  MemCopyQueueDescriptor
 
class  MemImportLayer
 This layer represents a memory import operation. More...
 
struct  MemImportQueueDescriptor
 
class  MemoryExportException
 
class  MemoryImportException
 
struct  MemoryInfo
 
class  MemoryManager
 
struct  MemoryRequirements
 
class  MemoryValidationException
 
struct  MemSyncQueueDescriptor
 
class  MergeLayer
 This layer dequantizes the input tensor. More...
 
struct  MergeQueueDescriptor
 
struct  minimum
 
class  MinimumLayer
 This layer represents a minimum operation. More...
 
struct  MinimumQueueDescriptor
 
class  MockTensorHandleFactory
 
class  MultiplicationLayer
 This layer represents a multiplication operation. More...
 
struct  MultiplicationQueueDescriptor
 
class  MultiTypedWorkload
 
class  NeonAbsWorkload
 
class  NeonActivationWorkload
 
class  NeonAdditionWorkload
 
class  NeonArgMinMaxWorkload
 
class  NeonBackend
 
class  NeonBackendModelContext
 The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions. More...
 
class  NeonBaseWorkload
 
class  NeonBatchMatMulWorkload
 
class  NeonBatchNormalizationWorkload
 
class  NeonBatchToSpaceNdWorkload
 
class  NeonCastWorkload
 
class  NeonChannelShuffleWorkload
 
class  NeonComparisonWorkload
 
class  NeonConcatWorkload
 
class  NeonConstantWorkload
 
class  NeonConvertFp16ToFp32Workload
 
class  NeonConvertFp32ToFp16Workload
 
class  NeonConvolution2dWorkload
 
class  NeonConvolution3dWorkload
 
class  NeonDepthToSpaceWorkload
 
class  NeonDepthwiseConvolutionWorkload
 
class  NeonDequantizeWorkload
 
class  NeonDetectionPostProcessWorkload
 
class  NeonDivisionWorkload
 
class  NeonElementwiseBinaryWorkload
 
class  NeonExpWorkload
 
class  NeonFillWorkload
 
class  NeonFloorFloatWorkload
 
class  NeonFullyConnectedWorkload
 
class  NeonFusedWorkload
 
class  NeonGatherNdWorkload
 
class  NeonGatherWorkload
 
class  NeonInstanceNormalizationWorkload
 
class  NeonInterceptorScheduler
 
class  NeonL2NormalizationFloatWorkload
 
class  NeonLayerSupport
 
class  NeonLogicalAndWorkload
 
class  NeonLogicalNotWorkload
 
class  NeonLogicalOrWorkload
 
class  NeonLogSoftmaxWorkload
 
class  NeonLogWorkload
 
class  NeonLstmFloatWorkload
 
class  NeonMaximumWorkload
 
class  NeonMeanWorkload
 
class  NeonMemoryManager
 
class  NeonMinimumWorkload
 
class  NeonMultiplicationWorkload
 
class  NeonNegWorkload
 
class  NeonNormalizationFloatWorkload
 
class  NeonPadWorkload
 
class  NeonPermuteWorkload
 
class  NeonPooling2dWorkload
 
class  NeonPooling3dWorkload
 
class  NeonPreluWorkload
 
class  NeonQLstmWorkload
 
class  NeonQuantizedLstmWorkload
 
class  NeonQuantizeWorkload
 
struct  NeonRankWorkload
 
class  NeonReduceWorkload
 
class  NeonReshapeWorkload
 
class  NeonResizeWorkload
 
class  NeonReverseV2Workload
 
class  NeonRsqrtWorkload
 
class  NeonSinWorkload
 
class  NeonSliceWorkload
 
class  NeonSoftmaxWorkload
 
class  NeonSpaceToBatchNdWorkload
 
class  NeonSpaceToDepthWorkload
 
class  NeonSplitterWorkload
 
class  NeonSqrtWorkload
 
class  NeonStackWorkload
 
class  NeonStridedSliceWorkload
 
class  NeonSubTensorHandle
 
class  NeonSubtractionWorkload
 
class  NeonTensorDecorator
 NeonTensorDecorator wraps an existing Neon tensor allowing us to override the TensorInfo for it. More...
 
class  NeonTensorHandle
 
class  NeonTensorHandleDecorator
 
class  NeonTensorHandleFactory
 
class  NeonTileWorkload
 
class  NeonTimer
 
class  NeonTransposeConvolution2dWorkload
 
class  NeonTransposeWorkload
 
class  NeonUnidirectionalSequenceLstmFloatWorkload
 
class  NeonUnidirectionalSequenceLstmWorkload
 
class  NeonWorkloadFactory
 
class  NetworkImpl
 Private implementation of INetwork. More...
 
class  NodeContent
 
struct  NormalizationDescriptor
 A NormalizationDescriptor for the NormalizationLayer. More...
 
class  NormalizationLayer
 This layer represents a normalization operation. More...
 
struct  NormalizationQueueDescriptor
 
struct  NoThrowStrategy
 
struct  NullDescriptor
 Null Descriptor used as a return value from the IConnectableLayer GetParameters method by layers which do not have a descriptor. More...
 
class  NullPointerException
 
class  NullWorkload
 
class  OpenClTimer
 OpenClTimer instrument that times all OpenCl kernels executed between calls to Start() and Stop(). More...
 
class  Optimization
 
struct  OptimizationResult
 
class  OptimizationViews
 
class  OptimizedNetworkImpl
 
class  OptimizeForConnection
 
class  OptimizeForConnectionImpl
 Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. More...
 
class  OptimizeForExclusiveConnection
 
class  OptimizeForExclusiveConnectionImpl
 Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. More...
 
class  OptimizeForType
 
class  OptimizeForTypeImpl
 Wrapper Optimization base class that calls Wrapped::Run() for every layer of type BaseType. More...
 
class  OptimizeForTypeImpl< Layer, Wrapped >
 Specialization that calls Wrapped::Run() for any layer type. More...
 
class  Optimizer
 
struct  OptimizerOptions
 
class  OptimizerOptionsOpaque
 
struct  OptimizerOptionsOpaqueImpl
 
class  Optional
 
class  OptionalBase
 OptionalBase is the common functionality between reference and non-reference optional types. More...
 
class  OptionalReferenceSwitch
 The default implementation is the non-reference case. More...
 
class  OptionalReferenceSwitch< true, T >
 This is the special case for reference types. More...
 
struct  OriginsDescriptor
 An OriginsDescriptor for the ConcatLayer. More...
 
class  OutputHandler
 
class  OutputLayer
 A layer user-provided data can be bound to (e.g. inputs, outputs). More...
 
class  OutputSlot
 
struct  PadDescriptor
 A PadDescriptor for the PadLayer. More...
 
class  PadLayer
 This layer represents a pad operation. More...
 
struct  PadQueueDescriptor
 
class  ParseException
 
class  PassthroughTensorHandle
 
class  PerAxisIterator
 PerAxisIterator for per-axis quantization. More...
 
class  PermutationVector
 
struct  PermuteDescriptor
 A PermuteDescriptor for the PermuteLayer. More...
 
class  PermuteLayer
 This layer represents a permutation operation. More...
 
struct  PermuteQueueDescriptor
 
class  PolymorphicDowncastException
 
struct  Pooling2dDescriptor
 A Pooling2dDescriptor for the Pooling2dLayer. More...
 
class  Pooling2dLayer
 This layer represents a pooling 2d operation. More...
 
struct  Pooling2dQueueDescriptor
 
struct  Pooling3dDescriptor
 A Pooling3dDescriptor for the Pooling3dLayer. More...
 
class  Pooling3dLayer
 This layer represents a pooling 3d operation. More...
 
struct  Pooling3dQueueDescriptor
 
struct  power
 
struct  PreCompiledDescriptor
 A PreCompiledDescriptor for the PreCompiledLayer. More...
 
class  PreCompiledLayer
 
struct  PreCompiledQueueDescriptor
 
class  PreluLayer
 
struct  PreluQueueDescriptor
 
class  ProfilerImpl
 
class  ProfilerManager
 
class  ProfilingDetails
 ProfilingDetails class records any details associated with the operator and passes on for outputting to the user. More...
 
struct  ProgramBuilder
 
class  QASymm8Decoder
 
class  QASymm8Encoder
 
class  QASymmS8Decoder
 
class  QASymmS8Encoder
 
struct  QLstmBasicParameters
 
struct  QLstmDescriptor
 A QLstmDescriptor for the QLstmLayer. More...
 
class  QLstmLayer
 This layer represents a QLstm operation. More...
 
struct  QLstmOptCifgParameters
 
struct  QLstmOptLayerNormParameters
 
struct  QLstmOptPeepholeParameters
 
struct  QLstmOptProjectionParameters
 
struct  QLstmQueueDescriptor
 
class  QSymm16Decoder
 
class  QSymm16Encoder
 
class  QSymm16PerAxisEncoder
 
class  QSymm8PerAxisDecoder
 
class  QSymm8PerAxisEncoder
 
class  QSymmS8Decoder
 
class  QSymmS8Encoder
 
struct  QuantizationParametersAreEqual
 
struct  QuantizedLstmInputParams
 
struct  QuantizedLstmInputParamsInfo
 
class  QuantizedLstmLayer
 This layer represents a QuantizedLstm operation. More...
 
struct  QuantizedLstmParameters
 
struct  QuantizedLstmQueueDescriptor
 
struct  QuantizedMultiplierSmallerThanOne
 Performs multiplication of an integer with a multiplier which is less than one, using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. More...
 
class  QuantizeLayer
 
struct  QuantizeQueueDescriptor
 
struct  QueueDescriptor
 
struct  QueueDescriptorWithParameters
 
class  RangeTracker
 
class  RankLayer
 
struct  RankQueueDescriptor
 
struct  ReduceDescriptor
 A ReduceDescriptor for the REDUCE operators. More...
 
class  ReduceLayer
 This layer represents a reduction operation. More...
 
struct  ReduceQueueDescriptor
 
class  RefActivationWorkload
 
class  RefArgMinMaxWorkload
 
class  RefBackend
 
class  RefBaseWorkload
 
class  RefBatchMatMulWorkload
 
class  RefBatchNormalizationWorkload
 
class  RefBatchToSpaceNdWorkload
 
class  RefBroadcastToWorkload
 
class  RefCastWorkload
 
class  RefChannelShuffleWorkload
 
class  RefComparisonWorkload
 
class  RefConcatWorkload
 
class  RefConstantWorkload
 
class  RefConvertFp16ToFp32Workload
 
class  RefConvertFp32ToFp16Workload
 
class  RefConvolution2dWorkload
 
class  RefConvolution3dWorkload
 
class  RefDebugWorkload
 
class  RefDepthToSpaceWorkload
 
class  RefDepthwiseConvolution2dWorkload
 
class  RefDequantizeWorkload
 
class  RefDetectionPostProcessWorkload
 
class  RefElementwiseBinaryWorkload
 
class  RefElementwiseUnaryWorkload
 
class  RefElementwiseWorkload
 
class  RefFakeQuantizationFloat32Workload
 
class  RefFillWorkload
 
class  RefFloorWorkload
 
class  RefFullyConnectedWorkload
 
class  RefGatherNdWorkload
 
class  RefGatherWorkload
 
class  RefInstanceNormalizationWorkload
 
class  RefL2NormalizationWorkload
 
class  RefLayerSupport
 
class  RefLogicalBinaryWorkload
 
class  RefLogicalUnaryWorkload
 
class  RefLogSoftmaxWorkload
 
class  RefLstmWorkload
 
class  RefMeanWorkload
 
class  RefMemoryManager
 
class  RefNormalizationWorkload
 
class  RefPadWorkload
 
class  RefPermuteWorkload
 
class  RefPooling2dWorkload
 
class  RefPooling3dWorkload
 
class  RefPreluWorkload
 
class  RefQLstmWorkload
 
class  RefQuantizeWorkload
 
struct  RefRankWorkload
 
class  RefReduceWorkload
 
class  RefReshapeWorkload
 
class  RefResizeWorkload
 
class  RefReverseV2Workload
 
struct  RefShapeWorkload
 
class  RefSliceWorkload
 
class  RefSoftmaxWorkload
 
class  RefSpaceToBatchNdWorkload
 
class  RefSpaceToDepthWorkload
 
class  RefSplitterWorkload
 
class  RefStackWorkload
 
class  RefStridedSliceWorkload
 
class  RefTensorHandle
 
class  RefTensorHandleDecorator
 
class  RefTensorHandleFactory
 
class  RefTileWorkload
 
class  RefTransposeConvolution2dWorkload
 
class  RefTransposeWorkload
 
class  RefUnidirectionalSequenceLstmWorkload
 
class  RefWorkloadFactory
 
struct  ReshapeDescriptor
 A ReshapeDescriptor for the ReshapeLayer. More...
 
class  ReshapeLayer
 This layer represents a reshape operation. More...
 
struct  ReshapeQueueDescriptor
 
struct  ResizeDescriptor
 A ResizeDescriptor for the ResizeLayer. More...
 
class  ResizeLayer
 This layer represents a resize operation. More...
 
struct  ResizeQueueDescriptor
 
struct  ResolveTypeImpl
 
struct  ResolveTypeImpl< DataType::BFloat16 >
 
struct  ResolveTypeImpl< DataType::Boolean >
 
struct  ResolveTypeImpl< DataType::Float16 >
 
struct  ResolveTypeImpl< DataType::Float32 >
 
struct  ResolveTypeImpl< DataType::QAsymmS8 >
 
struct  ResolveTypeImpl< DataType::QAsymmU8 >
 
struct  ResolveTypeImpl< DataType::QSymmS16 >
 
struct  ResolveTypeImpl< DataType::QSymmS8 >
 
struct  ResolveTypeImpl< DataType::Signed32 >
 
struct  ResolveTypeImpl< DataType::Signed64 >
 
class  ReverseV2Layer
 This layer represents a ReverseV2 operation. More...
 
struct  ReverseV2QueueDescriptor
 
struct  rsqrt
 
class  RsqrtLayer
 
struct  RsqrtQueueDescriptor
 
struct  Rule
 
class  RuntimeException
 
struct  RuntimeImpl
 
class  ScaledInt32Decoder
 
class  ScaledInt32PerAxisDecoder
 
class  ScopedProfilingEvent
 
struct  ScopedRecord
 
class  ScopedTensorHandle
 
class  ShapeLayer
 
struct  ShapeQueueDescriptor
 
struct  ShapesAreBroadcastCompatible
 
struct  ShapesAreSameRank
 
struct  ShapesAreSameTotalSize
 
class  SimpleLogger
 
struct  sin
 
class  SingleAxisPriorityList
 SingleAxisPriorityList sorts the MemBlocks according to some priority, then trys to place them into as few bins as possible. More...
 
struct  SliceDescriptor
 A SliceDescriptor for the SliceLayer. More...
 
class  SliceLayer
 
struct  SliceQueueDescriptor
 
struct  SoftmaxDescriptor
 A SoftmaxDescriptor for the SoftmaxLayer. More...
 
class  SoftmaxLayer
 This layer represents a softmax operation. More...
 
struct  SoftmaxQueueDescriptor
 
struct  SpaceToBatchNdDescriptor
 A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer. More...
 
class  SpaceToBatchNdLayer
 This layer represents a SpaceToBatchNd operation. More...
 
struct  SpaceToBatchNdQueueDescriptor
 
struct  SpaceToDepthDescriptor
 A SpaceToDepthDescriptor for the SpaceToDepthLayer. More...
 
class  SpaceToDepthLayer
 This layer represents a SpaceToDepth operation. More...
 
struct  SpaceToDepthQueueDescriptor
 
class  SplitterLayer
 This layer represents a split operation. More...
 
struct  SplitterQueueDescriptor
 
struct  sqrt
 
struct  squaredDifference
 
struct  StackDescriptor
 A StackDescriptor for the StackLayer. More...
 
class  StackLayer
 This layer represents a stack operation. More...
 
struct  StackQueueDescriptor
 
class  StandardOutputSink
 
struct  StandInDescriptor
 A StandInDescriptor for the StandIn layer. More...
 
class  StandInLayer
 This layer represents an unknown operation in the input graph. More...
 
class  StrategyBase
 Strategy base class with empty implementations. More...
 
struct  StrategyFactory
 
class  StrategyValidator
 
struct  StridedSliceDescriptor
 A StridedSliceDescriptor for the StridedSliceLayer. More...
 
class  StridedSliceLayer
 This layer represents a strided slice operation. More...
 
struct  StridedSliceQueueDescriptor
 
struct  StringifyLayerParameters
 StringifyLayerParameters allows serializing layer parameters to string. More...
 
struct  StringifyLayerParameters< ActivationDescriptor >
 
struct  StringifyLayerParameters< BatchMatMulDescriptor >
 
struct  StringifyLayerParameters< BatchNormalizationDescriptor >
 
struct  StringifyLayerParameters< BatchToSpaceNdDescriptor >
 
struct  StringifyLayerParameters< ChannelShuffleDescriptor >
 
struct  StringifyLayerParameters< ComparisonDescriptor >
 
struct  StringifyLayerParameters< Convolution2dDescriptor >
 
struct  StringifyLayerParameters< Convolution3dDescriptor >
 
struct  StringifyLayerParameters< DepthwiseConvolution2dDescriptor >
 
struct  StringifyLayerParameters< DetectionPostProcessDescriptor >
 
struct  StringifyLayerParameters< ElementwiseBinaryDescriptor >
 
struct  StringifyLayerParameters< ElementwiseUnaryDescriptor >
 
struct  StringifyLayerParameters< FakeQuantizationDescriptor >
 
struct  StringifyLayerParameters< FullyConnectedDescriptor >
 
struct  StringifyLayerParameters< FusedDescriptor >
 
struct  StringifyLayerParameters< GatherDescriptor >
 
struct  StringifyLayerParameters< L2NormalizationDescriptor >
 
struct  StringifyLayerParameters< LstmDescriptor >
 
struct  StringifyLayerParameters< MeanDescriptor >
 
struct  StringifyLayerParameters< NormalizationDescriptor >
 
struct  StringifyLayerParameters< OriginsDescriptor >
 
struct  StringifyLayerParameters< PadDescriptor >
 
struct  StringifyLayerParameters< PermuteDescriptor >
 
struct  StringifyLayerParameters< Pooling2dDescriptor >
 
struct  StringifyLayerParameters< Pooling3dDescriptor >
 
struct  StringifyLayerParameters< PreCompiledDescriptor >
 
struct  StringifyLayerParameters< ReduceDescriptor >
 
struct  StringifyLayerParameters< ReshapeDescriptor >
 
struct  StringifyLayerParameters< ResizeDescriptor >
 
struct  StringifyLayerParameters< SoftmaxDescriptor >
 
struct  StringifyLayerParameters< SpaceToBatchNdDescriptor >
 
struct  StringifyLayerParameters< SpaceToDepthDescriptor >
 
struct  StringifyLayerParameters< StackDescriptor >
 
struct  StringifyLayerParameters< StridedSliceDescriptor >
 
struct  StringifyLayerParameters< TileDescriptor >
 
struct  StringifyLayerParameters< TransposeConvolution2dDescriptor >
 
struct  StringifyLayerParameters< TransposeDescriptor >
 
struct  StringifyLayerParameters< ViewsDescriptor >
 
struct  StringMapping
 StringMapping is helper class to be able to use strings as template parameters, so this allows simplifying code which only differs in a string, such as a debug string literal. More...
 
class  SubgraphView
 The SubgraphView class represents a subgraph of a Graph. More...
 
class  SubgraphViewSelector
 Algorithm that splits a Graph into Subgraphs based on a filtering of layers (e.g. More...
 
class  SubtractionLayer
 This layer represents a subtraction operation. More...
 
struct  SubtractionQueueDescriptor
 
class  SwitchLayer
 This layer calculates both true and false outputs for input. More...
 
struct  SwitchQueueDescriptor
 
class  SyncMemGenericWorkload
 
class  Tensor
 A tensor defined by a TensorInfo (shape and data type) and a mutable backing store. More...
 
class  TensorBufferArrayView
 
class  TensorHandle
 
class  TensorHandleFactoryRegistry
 
class  TensorInfo
 
struct  TensorMemory
 
struct  TensorNumDimensionsAreCorrect
 
struct  TensorNumDimensionsAreGreaterOrEqualTo
 
class  TensorShape
 
struct  ThrowingStrategy
 
struct  TileDescriptor
 
class  TileLayer
 
struct  TileQueueDescriptor
 
class  TimeoutException
 
class  TosaRefBackend
 
class  TosaRefBaseWorkload
 
class  TosaRefLayerSupport
 
class  TosaRefMemoryManager
 
class  TosaRefPreCompiledWorkload
 
class  TosaRefTensorHandle
 
class  TosaRefTensorHandleFactory
 
class  TosaRefWorkloadFactory
 
class  TransformIterator
 
struct  TransposeConvolution2dDescriptor
 A TransposeConvolution2dDescriptor for the TransposeConvolution2dLayer. More...
 
class  TransposeConvolution2dLayer
 This layer represents a 2D transpose convolution operation. More...
 
struct  TransposeConvolution2dQueueDescriptor
 
struct  TransposeDescriptor
 A TransposeDescriptor for the TransposeLayer. More...
 
class  TransposeLayer
 This layer represents a transpose operation. More...
 
struct  TransposeQueueDescriptor
 
struct  TypeAnyOf
 
class  TypedIterator
 
class  TypedWorkload
 
struct  TypeIs
 
struct  TypeNotPerAxisQuantized
 
struct  TypesAreEqual
 
class  UnidirectionalSequenceLstmLayer
 This layer represents a LSTM operation. More...
 
struct  UnidirectionalSequenceLstmQueueDescriptor
 
class  UnimplementedException
 
class  UnmapLayer
 This layer represents a memory copy operation. More...
 
struct  UnmapQueueDescriptor
 
class  UnmapWorkload
 
struct  ViewsDescriptor
 A ViewsDescriptor for the SplitterLayer. More...
 
class  WallClockTimer
 
class  WorkloadDataCollector
 
class  WorkloadFactoryBase
 
struct  WorkloadInfo
 Contains information about TensorInfos of a layer. More...
 

Typedefs

using BackendIdVector = std::vector< BackendId >
 
using BackendIdSet = std::unordered_set< BackendId >
 
using NetworkOptions = std::vector< BackendOptions >
 
using ModelOptions = std::vector< BackendOptions >
 
using BackendCapabilities = BackendOptions
 
using IBackendInternalUniquePtr = std::unique_ptr< IBackendInternal >
 
using MemoryOptimizerStrategiesMapRef = std::unordered_map< BackendId, std::shared_ptr< IMemoryOptimizerStrategy > >
 
using DynamicBackendPtr = std::unique_ptr< DynamicBackend >
 
using IBackendContextUniquePtr = std::unique_ptr< IBackendContext >
 
using ILayerSupportSharedPtr = std::shared_ptr< ILayerSupport >
 
using IMemoryManagerUniquePtr = std::unique_ptr< IMemoryManager >
 
template<typename QueueDescriptor >
using FloatWorkload = TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Float32Workload = TypedWorkload< QueueDescriptor, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Uint8Workload = TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 >
 
template<typename QueueDescriptor >
using Int32Workload = TypedWorkload< QueueDescriptor, armnn::DataType::Signed32 >
 
template<typename QueueDescriptor >
using BooleanWorkload = TypedWorkload< QueueDescriptor, armnn::DataType::Boolean >
 
template<typename QueueDescriptor >
using BaseFloat32ComparisonWorkload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean >
 
template<typename QueueDescriptor >
using BaseUint8ComparisonWorkload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Boolean >
 
template<typename QueueDescriptor >
using BFloat16ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::BFloat16, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Float32ToBFloat16Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::BFloat16 >
 
template<typename QueueDescriptor >
using Float16ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Float32ToFloat16Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Float16 >
 
template<typename QueueDescriptor >
using Uint8ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Float32 >
 
using InputQueueDescriptor = MemCopyQueueDescriptor
 
using OutputQueueDescriptor = MemCopyQueueDescriptor
 
using MergerQueueDescriptor = ConcatQueueDescriptor
 
using LogSoftmaxDescriptor = SoftmaxDescriptor
 A LogSoftmaxDescriptor for the LogSoftmaxLayer. More...
 
using DepthToSpaceDescriptor = SpaceToDepthDescriptor
 A DepthToSpaceDescriptor for the DepthToSpaceLayer. More...
 
using UnidirectionalSequenceLstmDescriptor = LstmDescriptor
 
using ConcatDescriptor = OriginsDescriptor
 
using MergerDescriptor = OriginsDescriptor
 MergerDescriptor is deprecated, use ConcatDescriptor instead. More...
 
using SplitterDescriptor = ViewsDescriptor
 
using INetworkPtr = std::unique_ptr< INetwork, void(*)(INetwork *network)>
 
using IOptimizedNetworkPtr = std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)>
 
using CompiledBlobDeleter = std::function< void(const void *)>
 
using CompiledBlobPtr = std::unique_ptr< void, CompiledBlobDeleter >
 
using NetworkId = int
 
using IRuntimePtr = std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)>
 
using IGpuAccTunedParametersPtr = std::shared_ptr< IGpuAccTunedParameters >
 The following API is replaced by the backend options API. More...
 
using MemorySourceFlags = unsigned int
 
using BindingPointInfo = std::pair< armnn::LayerBindingId, armnn::TensorInfo >
 
using InputTensors = std::vector< std::pair< LayerBindingId, class ConstTensor > >
 
using OutputTensors = std::vector< std::pair< LayerBindingId, class Tensor > >
 
using IBackendSharedPtr = std::shared_ptr< IBackend >
 
using IBackendUniquePtr = std::unique_ptr< IBackend, void(*)(IBackend *backend)>
 
using LayerBindingId = int
 Type of identifiers for bindable layers (inputs, outputs). More...
 
using ImportedInputId = unsigned int
 
using ImportedOutputId = unsigned int
 
using DebugCallbackFunction = std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)>
 Define the type of callback for the Debug layer to call. More...
 
using HighResolutionClock = std::chrono::high_resolution_clock::time_point
 Define a timer and associated inference ID for recording execution times. More...
 
using InferenceTimingPair = std::pair< HighResolutionClock, HighResolutionClock >
 
using TensorInfos = std::vector< TensorInfo >
 
using WorkloadQueue = std::vector< std::unique_ptr< IWorkload > >
 
using Coordinates = std::array< unsigned int, MaxNumOfTensorDimensions >
 
using Dimensions = std::array< unsigned int, MaxNumOfTensorDimensions >
 
using LayerPriority = unsigned int
 
using AdditionalInfoObjectPtr = std::shared_ptr< void >
 
using PreCompiledObjectDeleter = std::function< void(const void *)>
 
using PreCompiledObjectPtr = std::unique_ptr< void, PreCompiledObjectDeleter >
 
template<LayerType Type>
using LayerTypeOf = typename LayerTypeOfImpl< Type >::Type
 
using NetworkImplPtr = std::unique_ptr< NetworkImpl, void(*)(NetworkImpl *network)>
 
using BackendsMap = std::map< BackendId, std::unique_ptr< class IBackendInternal > >
 
template<DataType DT>
using ResolveType = typename ResolveTypeImpl< DT >::Type
 
using LoadedNetworks = std::unordered_map< NetworkId, std::unique_ptr< LoadedNetwork > >
 
using IReportStructure = arm::pipe::IReportStructure
 
using IInitialiseProfilingService = arm::pipe::IInitialiseProfilingService
 
using ParameterStringifyFunction = std::function< void(const std::string &name, const std::string &value)>
 
using FactoryId = ITensorHandleFactory::FactoryId
 
using Half = half_float::half
 
using CopyAndImportFactoryPairs = std::map< ITensorHandleFactory::FactoryId, ITensorHandleFactory::FactoryId >
 
using ACLMemManagerOnDemand = std::shared_ptr< arm_compute::MemoryManagerOnDemand >
 
using RefDebugBFloat16Workload = RefDebugWorkload< DataType::BFloat16 >
 
using RefDebugFloat16Workload = RefDebugWorkload< DataType::Float16 >
 
using RefDebugFloat32Workload = RefDebugWorkload< DataType::Float32 >
 
using RefDebugQAsymmU8Workload = RefDebugWorkload< DataType::QAsymmU8 >
 
using RefDebugQAsymmS8Workload = RefDebugWorkload< DataType::QAsymmS8 >
 
using RefDebugQSymmS16Workload = RefDebugWorkload< DataType::QSymmS16 >
 
using RefDebugQSymmS8Workload = RefDebugWorkload< DataType::QSymmS8 >
 
using RefDebugSigned32Workload = RefDebugWorkload< DataType::Signed32 >
 
using RefDebugSigned64Workload = RefDebugWorkload< DataType::Signed64 >
 
template<typename DataType = float>
using RefAdditionWorkload = RefElementwiseWorkload< std::plus< DataType >, AdditionQueueDescriptor, StringMapping::RefAdditionWorkload_Execute >
 
template<typename DataType = float>
using RefSubtractionWorkload = RefElementwiseWorkload< std::minus< DataType >, SubtractionQueueDescriptor, StringMapping::RefSubtractionWorkload_Execute >
 
template<typename DataType = float>
using RefMultiplicationWorkload = RefElementwiseWorkload< std::multiplies< DataType >, MultiplicationQueueDescriptor, StringMapping::RefMultiplicationWorkload_Execute >
 
template<typename DataType = float>
using RefDivisionWorkload = RefElementwiseWorkload< std::divides< DataType >, DivisionQueueDescriptor, StringMapping::RefDivisionWorkload_Execute >
 
template<typename DataType = float>
using RefMaximumWorkload = RefElementwiseWorkload< armnn::maximum< DataType >, MaximumQueueDescriptor, StringMapping::RefMaximumWorkload_Execute >
 
template<typename DataType = float>
using RefMinimumWorkload = RefElementwiseWorkload< armnn::minimum< DataType >, MinimumQueueDescriptor, StringMapping::RefMinimumWorkload_Execute >
 
using RefPermuteBFloat16Workload = RefPermuteWorkload< DataType::BFloat16 >
 
using RefPermuteFloat16Workload = RefPermuteWorkload< DataType::Float16 >
 
using RefPermuteFloat32Workload = RefPermuteWorkload< DataType::Float32 >
 
using RefPermuteQAsymmS8Workload = RefPermuteWorkload< DataType::QAsymmS8 >
 
using RefPermuteQAsymm8Workload = RefPermuteWorkload< DataType::QAsymmU8 >
 
using RefPermuteQSymm16Workload = RefPermuteWorkload< DataType::QSymmS16 >
 
using RefTransposeBFloat16Workload = RefTransposeWorkload< DataType::BFloat16 >
 
using RefTransposeFloat16Workload = RefTransposeWorkload< DataType::Float16 >
 
using RefTransposeFloat32Workload = RefTransposeWorkload< DataType::Float32 >
 
using RefTransposeQAsymmS8Workload = RefTransposeWorkload< DataType::QAsymmS8 >
 
using RefTransposeQAsymm8Workload = RefTransposeWorkload< DataType::QAsymmU8 >
 
using RefTransposeQSymm16Workload = RefTransposeWorkload< DataType::QSymmS16 >
 

Enumerations

enum  Compute { Undefined = 0, CpuRef = 1, CpuAcc = 2, GpuAcc = 3 }
 The Compute enum is now deprecated and it is now being replaced by BackendId. More...
 
enum  CapabilityClass { PaddingRequired = 1, FallbackImportDisabled = 2, CapabilityClassMax = 254 }
 Capability class to calculate in the GetCapabilities function so that only the capability in the scope can be choose to calculate. More...
 
enum  EdgeStrategy { Undefined, DirectCompatibility, ExportToTarget, CopyToTarget }
 
enum  BoostLogSeverityMapping {
  trace, debug, info, warning,
  error, fatal
}
 
enum  Status { Success = 0, Failure = 1 }
 
enum  DataType {
  Float16 = 0, Float32 = 1, QAsymmU8 = 2, Signed32 = 3,
  Boolean = 4, QSymmS16 = 5, QSymmS8 = 6, QAsymmS8 = 7,
  BFloat16 = 8, Signed64 = 9
}
 
enum  DataLayout { NCHW = 1, NHWC = 2, NDHWC = 3, NCDHW = 4 }
 
enum  ProfilingDetailsMethod { Undefined = 0, DetailsWithEvents = 1, DetailsOnly = 2 }
 Define the behaviour of the internal profiler when outputting network details. More...
 
enum  QosExecPriority { Low = 0, Medium = 1, High = 2 }
 
enum  ActivationFunction {
  Sigmoid = 0, TanH = 1, Linear = 2, ReLu = 3,
  BoundedReLu = 4, SoftReLu = 5, LeakyReLu = 6, Abs = 7,
  Sqrt = 8, Square = 9, Elu = 10, HardSwish = 11,
  Gelu = 12
}
 
enum  ArgMinMaxFunction { Min = 0, Max = 1 }
 
enum  ComparisonOperation {
  Equal = 0, Greater = 1, GreaterOrEqual = 2, Less = 3,
  LessOrEqual = 4, NotEqual = 5
}
 
enum  LogicalBinaryOperation { LogicalAnd = 0, LogicalOr = 1 }
 
enum  UnaryOperation {
  Abs = 0, Exp = 1, Sqrt = 2, Rsqrt = 3,
  Neg = 4, LogicalNot = 5, Log = 6, Sin = 7,
  Ceil = 8
}
 
enum  BinaryOperation {
  Add = 0, Div = 1, Maximum = 2, Minimum = 3,
  Mul = 4, Sub = 5, SqDiff = 6, Power = 7
}
 
enum  PoolingAlgorithm { Max = 0, Average = 1, L2 = 2 }
 
enum  ReduceOperation {
  Sum = 0, Max = 1, Mean = 2, Min = 3,
  Prod = 4
}
 
enum  ResizeMethod { Bilinear = 0, NearestNeighbor = 1 }
 
enum  Dimensionality { NotSpecified = 0, Specified = 1, Scalar = 2 }
 
enum  PaddingMethod { IgnoreValue = 0, Exclude = 1 }
 The padding method modifies the output of pooling layers. More...
 
enum  PaddingMode { Constant = 0, Reflect = 1, Symmetric = 2 }
 The padding mode controls whether the padding should be filled with constant values (Constant), or reflect the input, either including the border values (Symmetric) or not (Reflect). More...
 
enum  NormalizationAlgorithmChannel { Across = 0, Within = 1 }
 
enum  NormalizationAlgorithmMethod { LocalBrightness = 0, LocalContrast = 1 }
 
enum  OutputShapeRounding { Floor = 0, Ceiling = 1 }
 
enum  ShapeInferenceMethod { ValidateOnly = 0, InferAndValidate = 1 }
 The ShapeInferenceMethod modify how the output shapes are treated. More...
 
enum  MemorySource : uint32_t {
  Undefined = 0, Malloc = 1, DmaBuf = 2, DmaBufProtected = 4,
  Gralloc = 8
}
 Define the Memory Source to reduce copies. More...
 
enum  MemBlockStrategyType { SingleAxisPacking = 0, MultiAxisPacking = 1 }
 
enum  FusedKernelType { AddMulAdd = 0 }
 
enum  BackendCapability : uint32_t { NonConstWeights, AsyncExecution }
 BackendCapability class. More...
 
enum  LayerType {
  X, Activation, Addition, ArgMinMax,
  BatchNormalization, BatchToSpaceNd, Comparison, Concat,
  Constant, ConvertFp16ToFp32, ConvertFp32ToFp16, Convolution2d,
  Debug, DepthToSpace, DepthwiseConvolution2d, Dequantize,
  DetectionPostProcess, Division, ElementwiseUnary, FakeQuantization,
  Fill, Floor, FullyConnected, Gather,
  Input, InstanceNormalization, L2Normalization, LogicalBinary,
  LogSoftmax, Lstm, QLstm, Map,
  Maximum, Mean, MemCopy, MemImport,
  Merge, Minimum, Multiplication, Normalization,
  Output, Pad, Permute, Pooling2d,
  PreCompiled, Prelu, Quantize, QuantizedLstm,
  Reshape, Rank, Resize, Reduce,
  Slice, Softmax, SpaceToBatchNd, SpaceToDepth,
  Splitter, Stack, StandIn, StridedSlice,
  Subtraction, Switch, Transpose, TransposeConvolution2d,
  Unmap, Cast, Shape, UnidirectionalSequenceLstm,
  ChannelShuffle, Convolution3d, Pooling3d, GatherNd,
  BatchMatMul, ElementwiseBinary, ReverseV2, Tile,
  Fused, BroadcastTo, FirstLayer = Activation, LastLayer = BroadcastTo
}
 When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below. More...
 
enum  LogSeverity {
  Trace, Debug, Info, Warning,
  Error, Fatal
}
 
enum  GraphEvent { LayerAdded, LayerErased }
 
enum  JsonObjectType { Measurement, Event, ExecObjectDesc }
 
enum  TuningLevel { None, Rapid, Normal, Exhaustive }
 

Functions

LayerSupportHandle GetILayerSupportByBackendId (const armnn::BackendId &backend)
 Convenience function to retrieve the ILayerSupportHandle for a backend. More...
 
bool HasCapability (const std::string &name, const BackendCapabilities &capabilities)
 Convenience function to check if a capability exists in a BackendCapabilites struct. More...
 
bool HasCapability (const std::string &name, const armnn::BackendId &backend)
 Convenience function to check if a capability exists in a backend. More...
 
bool HasCapability (const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
 Convenience function to check if a given capability matches a capability in a BackendCapabilities struct. More...
 
bool HasCapability (const BackendOptions::BackendOption &backendOption, const armnn::BackendId &backend)
 Convenience function to check if a given capability matches a capability in a backend. More...
 
bool HasMatchingCapability (const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
 Convenience function to check if a given capability matches a capability in a BackendCapabilities struct. More...
 
bool HasMatchingCapability (const BackendOptions::BackendOption &backendOption, const armnn::BackendId &backend)
 Convenience function to check if a given capability matches a capability in a backend. More...
 
Optional< const BackendOptions::BackendOptionGetCapability (const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
 Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted. More...
 
Optional< const BackendOptions::BackendOptionGetCapability (const std::string &backendCapabilityName, const armnn::BackendId &backend)
 Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted. More...
 
unsigned int GetNumberOfCacheFiles (const armnn::BackendId &backend)
 Returns the number of cached files if backend supports caching. More...
 
constexpr char const * GetComputeDeviceAsCString (Compute compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const std::vector< Compute > &compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const std::set< Compute > &compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const Compute &compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const BackendId &id)
 
template<template< typename... > class TContainer, typename... TContainerTemplateArgs>
std::ostream & operator<< (std::ostream &os, const TContainer< BackendId, TContainerTemplateArgs... > &ids)
 
template<typename F >
void ParseOptions (const std::vector< BackendOptions > &options, BackendId backend, F f)
 
bool ParseBooleanBackendOption (const armnn::BackendOptions::Var &value, bool defaultValue)
 
std::string ParseStringBackendOption (const armnn::BackendOptions::Var &value, std::string defaultValue)
 
int ParseIntBackendOption (const armnn::BackendOptions::Var &value, int defaultValue)
 
BackendRegistryBackendRegistryInstance ()
 
std::ostream & operator<< (std::ostream &os, const BackendVersion &backendVersion)
 
TensorShape GetUnpaddedTensorStrides (const TensorInfo &tensorInfo)
 
DataType GetBiasDataType (DataType inputDataType)
 
template<typename TensorShapeIt >
OriginsDescriptor CreateDescriptorForConcatenation (TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
 Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing concatenation of a number of input tensors. More...
 
template<typename ExceptionType >
void ConditionalThrow (bool condition, const std::string &message)
 
template<typename ExceptionType >
void ConditionalThrow (bool condition)
 
template<typename ExceptionType , typename ComparedType >
void ConditionalThrowIfNotEqual (const std::string &message, const ComparedType &leftHandSide, const ComparedType &rightHandSide)
 ComparedType must support: operator==(const ComparedType&) operator<<(ostream&, const ComparedType&) More...
 
IOptimizedNetworkPtr Optimize (const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptionsOpaque &options=OptimizerOptionsOpaque(), Optional< std::vector< std::string > & > messages=EmptyOptional())
 Create an optimized version of the network. More...
 
IOptimizedNetworkPtr Optimize (const Graph &inGraph, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptionsOpaque &options, Optional< std::vector< std::string > & > messages=EmptyOptional())
 Create an optimized version of the network. More...
 
IOptimizedNetworkPtr Optimize (const Graph &inGraph, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options, Optional< std::vector< std::string > & > messages=EmptyOptional())
 Accept legacy OptimizerOptions. More...
 
IOptimizedNetworkPtr Optimize (const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options, Optional< std::vector< std::string > & > messages=EmptyOptional())
 Accept legacy OptimizerOptions. More...
 
std::string LevelToString (LogSeverity level)
 
LogSeverity StringToLogLevel (std::string level)
 
void SetLogFilter (LogSeverity level)
 
void SetAllLoggingSinks (bool standardOut, bool debugOut, bool coloured)
 
constexpr LogSeverity ConvertLogSeverity (BoostLogSeverityMapping severity)
 
template<typename Arg , typename std::enable_if< IsMemorySource< Arg >::value >::type * = nullptr>
MemorySourceFlags Combine (Arg sourceA, Arg sourceB)
 
template<typename Arg , typename ... Args, typename std::enable_if< IsMemorySource< Arg >::value >::type * = nullptr>
MemorySourceFlags Combine (Arg source, Args... rest)
 
bool CheckFlag (MemorySourceFlags flags, MemorySource source)
 
template<typename T , class... Args>
Optional< T > MakeOptional (Args &&... args)
 Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> object. More...
 
const char * GetLayerTypeAsCString (LayerType type)
 
constexpr char const * GetStatusAsCString (Status status)
 
constexpr char const * GetActivationFunctionAsCString (ActivationFunction activation)
 
constexpr char const * GetArgMinMaxFunctionAsCString (ArgMinMaxFunction function)
 
constexpr char const * GetComparisonOperationAsCString (ComparisonOperation operation)
 
constexpr char const * GetBinaryOperationAsCString (BinaryOperation operation)
 
constexpr char const * GetUnaryOperationAsCString (UnaryOperation operation)
 
constexpr char const * GetLogicalBinaryOperationAsCString (LogicalBinaryOperation operation)
 
constexpr char const * GetFusedTypeAsCString (FusedKernelType type)
 
constexpr char const * GetPoolingAlgorithmAsCString (PoolingAlgorithm pooling)
 
constexpr char const * GetOutputShapeRoundingAsCString (OutputShapeRounding rounding)
 
constexpr char const * GetPaddingMethodAsCString (PaddingMethod method)
 
constexpr char const * GetPaddingModeAsCString (PaddingMode mode)
 
constexpr char const * GetReduceOperationAsCString (ReduceOperation reduce_operation)
 
constexpr unsigned int GetDataTypeSize (DataType dataType)
 
template<unsigned N>
constexpr bool StrEqual (const char *strA, const char(&strB)[N])
 
constexpr armnn::Compute ParseComputeDevice (const char *str)
 Deprecated function that will be removed together with the Compute enum. More...
 
constexpr const char * GetDataTypeName (DataType dataType)
 
constexpr const char * GetDataLayoutName (DataLayout dataLayout)
 
constexpr const char * GetNormalizationAlgorithmChannelAsCString (NormalizationAlgorithmChannel channel)
 
constexpr const char * GetNormalizationAlgorithmMethodAsCString (NormalizationAlgorithmMethod method)
 
constexpr const char * GetResizeMethodAsCString (ResizeMethod method)
 
constexpr const char * GetMemBlockStrategyTypeName (MemBlockStrategyType memBlockStrategyType)
 
template<typename T >
constexpr bool IsQuantizedType ()
 
constexpr bool IsQuantized8BitType (DataType dataType)
 
constexpr bool IsQuantizedType (DataType dataType)
 
std::ostream & operator<< (std::ostream &os, Status stat)
 
std::ostream & operator<< (std::ostream &os, const armnn::TensorShape &shape)
 
template<typename QuantizedType >
QuantizedType Quantize (float value, float scale, int32_t offset)
 Quantize a floating point data type into an 8-bit data type. More...
 
template<typename QuantizedType >
float Dequantize (QuantizedType value, float scale, int32_t offset)
 Dequantize an 8-bit data type into a floating point data type. More...
 
void VerifyTensorInfoDataType (const armnn::TensorInfo &info, armnn::DataType dataType)
 
template<typename ... Ts>
void IgnoreUnused (Ts &&...)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Source >::value &&std::is_integral< Source >::value &&std::is_signed< Dest >::value &&std::is_integral< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Source >::value &&std::is_floating_point< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Source >::value &&std::is_signed< Dest >::value &&std::is_integral< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Source >::value &&std::is_integral< Source >::value &&std::is_floating_point< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Dest >::value &&std::is_integral< Dest >::value &&std::is_unsigned< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Dest >::value &&std::is_unsigned< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Dest >::value &&std::is_signed< Source >::value &&std::is_integral< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Dest >::value &&std::is_floating_point< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename DestType , typename SourceType >
DestType PolymorphicDowncast (SourceType *value)
 Polymorphic downcast for build in pointers only. More...
 
template<typename DestType , typename SourceType >
auto PolymorphicPointerDowncast (const SourceType &value)
 Polymorphic downcast for shared pointers and build in pointers. More...
 
std::chrono::high_resolution_clock::time_point GetTimeNow ()
 
std::chrono::duration< double, std::milli > GetTimeDuration (std::chrono::high_resolution_clock::time_point start_time)
 
template<typename Function , typename Iterator >
constexpr TransformIterator< Function, Iterator > MakeTransformIterator (Iterator i, Function f)
 
void ConfigureLogging (bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
 Configures the logging behaviour of the ARMNN library. More...
 
bool NeonDetected ()
 
const std::string GetVersion ()
 
float roundf (float value)
 
void swap (OriginsDescriptor &first, OriginsDescriptor &second)
 
void swap (ViewsDescriptor &first, ViewsDescriptor &second)
 
uint32_t GetNumInputs (bool biasEnabled)
 
void AssertNumberOfInputSlots (Layer &layer)
 
template<typename T >
constexpr LayerType LayerEnumOf (const T *=nullptr)
 
template<>
constexpr LayerType LayerEnumOf (const ActivationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const AdditionLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ArgMinMaxLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const BatchMatMulLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const BatchNormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const BatchToSpaceNdLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const BroadcastToLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const CastLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ChannelShuffleLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ComparisonLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConcatLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConstantLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConvertFp16ToFp32Layer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConvertFp32ToFp16Layer *)
 
template<>
constexpr LayerType LayerEnumOf (const Convolution2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const Convolution3dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DebugLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DepthToSpaceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DepthwiseConvolution2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DequantizeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DetectionPostProcessLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DivisionLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ElementwiseBinaryLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ElementwiseUnaryLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FakeQuantizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FillLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FloorLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FullyConnectedLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FusedLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const GatherLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const GatherNdLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const InputLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const InstanceNormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const L2NormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const LogicalBinaryLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const LogSoftmaxLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const LstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MapLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MaximumLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MeanLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MemCopyLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MemImportLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MergeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MinimumLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MultiplicationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const NormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const OutputLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PadLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PermuteLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const Pooling2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const Pooling3dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PreCompiledLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PreluLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const QuantizeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const QLstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const QuantizedLstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const RankLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ReduceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ReshapeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ResizeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ReverseV2Layer *)
 
template<>
constexpr LayerType LayerEnumOf (const ShapeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SliceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SoftmaxLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SpaceToBatchNdLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SpaceToDepthLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SplitterLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const StackLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const StandInLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const StridedSliceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SubtractionLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SwitchLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const TileLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const TransposeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const TransposeConvolution2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const UnidirectionalSequenceLstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const UnmapLayer *)
 
template<typename T , typename V >
void SetValueChecked (Optional< T & > optionalRef, V &&val)
 
template<typename Float16Func , typename Float32Func , typename Uint8Func , typename Int32Func , typename BooleanFunc , typename ... Params>
bool IsSupportedForDataTypeGeneric (Optional< std::string & > reasonIfUnsupported, DataType dataType, Float16Func float16FuncPtr, Float32Func float32FuncPtr, Uint8Func uint8FuncPtr, Int32Func int32FuncPtr, BooleanFunc booleanFuncPtr, Params &&... params)
 
template<typename ... Params>
bool TrueFunc (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFunc (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncF16 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncF32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncU8 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncI32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseInputFuncF32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseInputFuncF16 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseOutputFuncF32 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseOutputFuncF16 (Optional< std::string & > reasonIfUnsupported, Params &&... params)
 
void ValidateSourcesMatchOptimizedNetwork (std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
 This function performs a sanity check to ensure that the combination of input and output memory source matches the values for importEnabled and exportEnabled that were specified during optimization. More...
 
void CopyToOutputTensor (const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
 
const armnn::ConstTensor GetInputTensor (const LayerBindingId layerId, const InputTensors &inputTensors)
 
const armnn::Tensor GetOutputTensor (const LayerBindingId layerId, const OutputTensors &outputTensors)
 
template<LogSeverity Level>
void SetLoggingSinks (bool standardOut, bool debugOut, bool coloured)
 
void ReportError (const std::string &errorMessage, Optional< std::vector< std::string > & > errorMessages)
 
void ReportWarning (const std::string &warningMessage, Optional< std::vector< std::string > & > warningMessages)
 
OptimizationResult ReturnWithError (OptimizationResult res, const Layer *layer, const BackendSettings &backendSettings, Optional< std::vector< std::string > & > errMessages)
 
bool CheckScaleSetOnQuantizedType (Layer *layer, Optional< std::vector< std::string > & > errMessages)
 
OptimizationResult AttemptBackendAssignment (BackendSettings &backendSettings, Graph &graph, Layer *layer, BackendId backend, DataType dataTypeIn, DataType dataTypeOut, const std::vector< BackendId > &availablePreferredBackends, std::string &reasonIfUnsupported, Optional< std::vector< std::string > & > errMessages)
 
std::vector< DataTypeGetLayerInOutDatatype (const Layer *layer)
 
bool CheckFp16Support (BackendsMap &backends, const std::vector< BackendId > &availablePreferredBackends)
 
void AssignBackendsIConnectable (OptimizedNetworkImpl *optNetObjPtr, IConnectableLayer *it, Optional< std::vector< std::string > & > errMessages, OptimizationResult &result, BackendSettings &backendSettings, std::vector< BackendId > &availablePreferredBackends)
 
OptimizationResult AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, Graph::Iterator &firstLayer, Graph::Iterator &lastLayer, Optional< std::vector< std::string > & > errMessages)
 
OptimizationResult AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView::IConnectableLayerIterator &firstLayer, SubgraphView::IConnectableLayerIterator &lastLayer, Optional< std::vector< std::string > & > errMessages)
 
OptimizationResult AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView &subgraph, Optional< std::vector< std::string > & > errMessages)
 
BackendsMap CreateSupportedBackends (TensorHandleFactoryRegistry &handleFactoryRegistry, BackendSettings &backendSettings)
 
OptimizationResult ApplyBackendOptimizations (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, BackendsMap &backends, const ModelOptions &modelOptions, Optional< std::vector< std::string > & > errMessages)
 
bool RequiresCopy (ITensorHandleFactory::FactoryId src, ITensorHandleFactory::FactoryId dst, TensorHandleFactoryRegistry &registry)
 
ITensorHandleFactory::FactoryId CalculateSlotOptionForInput (BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry, bool importEnabled)
 
ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput (BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry)
 
ITensorHandleFactory::FactoryId CalculateSlotOption (BackendsMap &backends, OutputSlot &outputSlot, TensorHandleFactoryRegistry &registry, bool exportEnabled)
 
EdgeStrategy CalculateEdgeStrategy (BackendsMap &backends, ITensorHandleFactory::FactoryId srcFactoryId, const Layer &layer, const Layer &connectedLayer, TensorHandleFactoryRegistry &registry, bool importEnabled)
 
OptimizationResult SelectTensorHandleStrategy (Graph &optGraph, BackendsMap &backends, TensorHandleFactoryRegistry &registry, bool importEnabled, bool exportEnabled, Optional< std::vector< std::string > & > errMessages)
 
std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore (Graph &graph, Layer &layer, bool expectCorrectInputType)
 
std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter (Graph &graph, Layer &layer)
 
std::vector< DebugLayer * > InsertDebugLayerAfter (Graph &graph, Layer &layer, bool toFile)
 
bool RevertConstantWeightsToFP32 (Layer *layer)
 
template<typename T >
void Append (Optimizer::Optimizations &optimizations, T &&optimization)
 
template<typename Front , typename... Others>
void Append (Optimizer::Optimizations &optimizations, Front &&front, Others &&... others)
 
template<typename... Args>
Optimizer::Optimizations MakeOptimizations (Args &&... args)
 
Measurement FindMeasurement (const std::string &name, const Event *event)
 
std::vector< MeasurementFindKernelMeasurements (const Event *event)
 
const EventGetEventPtr (const Event *ptr)
 
const EventGetEventPtr (const std::unique_ptr< Event > &ptr)
 
int CalcLevel (const Event *eventPtr)
 
void ConfigureDetailsObject (JsonChildObject &detailsObject, std::string layerDetailsStr)
 
void ExtractJsonObjects (unsigned int inferenceIndex, const Event *parentEvent, JsonChildObject &parentObject, std::map< const Event *, std::vector< const Event * >> descendantsMap)
 
template<typename DescriptorType >
void ProfilingUpdateDescriptions (const std::string &name, const DescriptorType &desc, const WorkloadInfo &infos, const arm::pipe::ProfilingGuid guid)
 
template<typename Delegate >
void ForEachLayerInput (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)
 
template<typename Delegate >
void ForEachLayerOutput (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)
 
void AssignSplitId (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo)
 
bool IsReadyForSplitAssignment (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo)
 
bool IsLayerSupported (const armnn::Layer *layer)
 
bool IsLayerSupported (const armnn::Layer &layer)
 
bool IsLayerOptimizable (const armnn::Layer *layer)
 
bool IsLayerOptimizable (const armnn::Layer &layer)
 
constexpr const char * MockTensorHandleFactoryId ()
 
GraphGetGraphForTesting (IOptimizedNetwork *optNet)
 
ModelOptionsGetModelOptionsForTesting (IOptimizedNetwork *optNet)
 
arm::pipe::IProfilingService & GetProfilingService (armnn::RuntimeImpl *runtime)
 
std::ostream & operator<< (std::ostream &os, const BFloat16 &b)
 
template<typename LayerType >
LayerTypeFuseLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
 
template<typename LayerType >
LayerTypeFuseAdditionLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseSubtractionLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseDivisionLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseMultiplicationLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseElementwiseBinaryLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, BinaryOperation operation, std::string name)
 
template<typename LayerType >
LayerTypeFuseBatchNormalizationLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseConvolution2dLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseDepthwiseConvolution2dLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseFullyConnectedLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
std::vector< IConnectableLayer * > ChainReduceLayers (OptimizationViews &optimizationViews, LayerType *baseLayer, ReduceDescriptor &desc)
 
template<typename LayerType >
void ReplaceLayers (OptimizationViews &optimizationViews, LayerType *baseLayer, std::vector< IConnectableLayer * > &layers)
 
template<typename LayerType >
void ReplaceMultipleLayers (OptimizationViews &optimizationViews, std::vector< IConnectableLayer * > &originalLayers, LayerType *baseLayer, const std::vector< SlotList > inputLayersSlotLists, const std::vector< SlotList > outputLayersSlotLists)
 
TuningLevel ParseTuningLevel (const BackendOptions::Var &value, TuningLevel defaultValue)
 
void ConfigureTuner (arm_compute::CLTuner &tuner, TuningLevel level)
 
arm_compute::NormalizationLayerInfo CreateAclNormalizationLayerInfoForL2Normalization (const armnn::TensorInfo &tensorInfo, armnn::DataLayout dataLayout)
 
arm_compute::ActivationLayerInfo::ActivationFunction ConvertActivationFunctionToAclActivationFunction (ActivationFunction armnnFunction)
 
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo (const ActivationDescriptor &actDesc)
 
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo (const ActivationDescriptor *activationDescPtr)
 
arm_compute::ActivationLayerInfo ConvertAdditionalInfoToAclActivationLayerInfo (const QueueDescriptor &queueDescriptor)
 
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo (uint32_t activationFunction)
 
arm_compute::ComparisonOperation ConvertComparisonOperationToAcl (const ComparisonDescriptor &descriptor)
 
arm_compute::PoolingType ConvertPoolingAlgorithmToAclPoolingType (PoolingAlgorithm poolingAlgorithm)
 
arm_compute::DimensionRoundingType ConvertOutputShapeRoundingToAclDimensionRoundingType (OutputShapeRounding rounding)
 
arm_compute::NormType ConvertNormalizationAlgorithmChannelToAclNormType (NormalizationAlgorithmChannel channelType)
 
arm_compute::FullyConnectedLayerInfo ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo (const FullyConnectedDescriptor &fullyConnectedDesc, const ActivationDescriptor *activationDesc)
 
arm_compute::FullyConnectedLayerInfo ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo (const FullyConnectedDescriptor &fullyConnectedDesc, arm_compute::ActivationLayerInfo activationLayerInfo)
 
arm_compute::InterpolationPolicy ConvertResizeMethodToAclInterpolationPolicy (ResizeMethod resizeMethod)
 
template<typename T >
ComputeSoftmaxAclAxis (const SoftmaxDescriptor &softmaxDesc, const armnn::TensorInfo &tensor)
 
std::set< unsigned int > ComputeSplitAxis (const armnn::SplitterDescriptor &desc, const TensorShape &input)
 
int ComputeAclAxis (const int &armnnAxis, const armnn::TensorInfo &tensor)
 Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank) More...
 
arm_compute::Conv3dInfo ComputeConv3DInfo (const armnn::Convolution3dDescriptor descriptor, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor. More...
 
arm_compute::Conv3dInfo ComputeConv3DInfo (const armnn::Convolution3dQueueDescriptor queueDescriptor, bool isFastMathEnabled)
 
arm_compute::PaddingMode ConvertPaddingModeToAcl (const PaddingMode &paddingMode)
 
arm_compute::ReductionOperation ConvertReductionOperationToAcl (const ReduceDescriptor &descriptor)
 
const TensorInfo ComputeReductionTensorShape (const armnn::TensorInfo &input, const std::vector< uint32_t > &vAxis, const bool keepDims)
 Function to compute the output tensor shape based on the axes and if keepDims is set. More...
 
armnn::Optional< armnn::DataTypeGetBiasTypeFromWeightsType (armnn::Optional< armnn::DataType > weightsType)
 
template<typename F >
bool CheckSupportRule (F rule, Optional< std::string & > reasonIfUnsupported, const char *reason)
 
template<typename T >
bool AllTypesAreEqualImpl (T)
 
template<typename T , typename... Rest>
bool AllTypesAreEqualImpl (T t1, T t2, Rest... rest)
 
std::unique_ptr< IMemoryOptimizerStrategyGetMemoryOptimizerStrategy (const std::string &strategyName)
 
const std::vector< std::string > GetMemoryOptimizerStrategyNames ()
 
bool IsNCHW (armnn::Layer &layer)
 
void ReportUntouchedLayers (OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
 
template<typename LayerType >
LayerTypeFoldPadLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, PadLayer *padLayer)
 
bool ConnectedToLayerWithNCHW (Layer *baseLayer)
 Checks if the Layer is connected to any Layer that has an NCHW layout. More...
 
bool ConnectedToLayerType (Layer *baseLayer, LayerType layerType, unsigned int dimSize=0)
 Checks the Layer's Connections to see if it's connected to a Layer with the provided layerType. More...
 
void RemoveReshapeLayer (ReshapeLayer *baseLayer, std::map< LayerGuid, Layer * > &untouched, OptimizationViews &optimizationViews)
 
template<typename LayerType >
LayerTypeFoldPadIntoAveragePool2d (OptimizationViews &optimizationViews, Pooling2dLayer *baseLayer, Pooling2dDescriptor &poolDescriptor, PadLayer *padLayer)
 
bool IsSequenceLayerType (Layer &layer, LayerType type)
 
bool IsSequenceLayerType (Layer &layer, BinaryOperation type)
 
template<typename TYPE >
bool IsLayerSequence (Layer &currentLayer, TYPE first, TYPE second, TYPE third, Layer *layerList[4], bool handleValidActivates, const std::vector< ActivationFunction > &validActivates)
 
armnn::ConstTensor PermuteTensor (const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
 
void ReshapeWeightsForAcl (TensorInfo &weightInfo, DataLayout dataLayout)
 
template<typename DataType >
ConstTensor ReorderWeightChannelsForAcl (const ConstTensor &weightHandle, DataLayout dataLayout, void *permuteBuffer)
 
TensorInfo ConvertWeightTensorInfoFromArmnnToAcl (const TensorInfo &weightInfo, DataLayout dataLayout)
 
std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl (const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout dataLayout, void *permuteBuffer)
 Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library. More...
 
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl (const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
 Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library Returns a tuple of converted weights tensor info and depth multiplier. More...
 
std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW (const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout &dataLayout, void *permuteBuffer)
 Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]. More...
 
armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl (const ConstTensorHandle *weightTensor, DataLayout dataLayout, void *permuteBuffer)
 
int32_t ConvertMaskToACLFormat (int32_t mask, int32_t numDim)
 
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices (TensorInfo inputInfo0, TensorInfo inputInfo1)
 Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1) More...
 
armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions (unsigned int rank)
 Generates a permutation vector of size rank that permutes the 2 most right dimensions. More...
 
template<typename CopyFunc >
void CopyTensorContentsGeneric (const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
 
template<typename SrcTensorHandleType , typename DstTensorHandleType , typename DescriptorType >
void GatherTensorHandlePairs (const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType * >> &tensorHandlePairs)
 
constexpr const char * ClBackendId ()
 
flatbuffers::Offset< ClContext > CreateClContext (flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>> programs=0)
 
flatbuffers::Offset< ClContext > CreateClContextDirect (flatbuffers::FlatBufferBuilder &_fbb, const std::vector< flatbuffers::Offset< armnn::Program >> *programs=nullptr)
 
flatbuffers::Offset< Program > CreateProgram (flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::String > name=0, flatbuffers::Offset< flatbuffers::Vector< uint8_t >> binary=0)
 
flatbuffers::Offset< Program > CreateProgramDirect (flatbuffers::FlatBufferBuilder &_fbb, const char *name=nullptr, const std::vector< uint8_t > *binary=nullptr)
 
const armnn::ClContext * GetClContext (const void *buf)
 
const armnn::ClContext * GetSizePrefixedClContext (const void *buf)
 
const char * ClContextIdentifier ()
 
bool ClContextBufferHasIdentifier (const void *buf)
 
bool VerifyClContextBuffer (flatbuffers::Verifier &verifier)
 
bool VerifySizePrefixedClContextBuffer (flatbuffers::Verifier &verifier)
 
const char * ClContextExtension ()
 
void FinishClContextBuffer (flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset< armnn::ClContext > root)
 
void FinishSizePrefixedClContextBuffer (flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset< armnn::ClContext > root)
 
constexpr const char * ClImportTensorHandleFactoryId ()
 
constexpr const char * ClTensorHandleFactoryId ()
 
arm_compute::Status ClAbsWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClActivationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor)
 
arm_compute::Status ClAdditionValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClArgMinMaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ArgMinMaxDescriptor &descriptor)
 
arm_compute::Status ClBatchMatMulValidate (const TensorInfo &inputInfoX, const TensorInfo &inputInfoY, const TensorInfo &outputInfo, const BatchMatMulDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClBatchNormalizationValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClBatchToSpaceNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor)
 
arm_compute::Status ClCastValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClChannelShuffleValidate (const TensorInfo &input, const TensorInfo &output, const ChannelShuffleDescriptor &descriptor)
 
arm_compute::Status ClComparisonWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ComparisonDescriptor &descriptor)
 
arm_compute::Status ClConcatWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const OriginsDescriptor &descriptor)
 
arm_compute::Status ClConstantWorkloadValidate (const TensorInfo &output)
 
arm_compute::Status ClConvertFp16ToFp32WorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClConvertFp32ToFp16WorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClConvolution3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution3dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClDepthToSpaceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthToSpaceDescriptor &descriptor)
 
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClDequantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClDivisionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClElementwiseBinaryValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ElementwiseBinaryDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClExpWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClFloorWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClFullyConnectedWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClGatherNdWorkloadValidate (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo)
 
arm_compute::Status ClGatherWorkloadValidate (const TensorInfo &input, const TensorInfo &indices, const TensorInfo &output, const GatherDescriptor &descriptor)
 
arm_compute::Status ClInstanceNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const InstanceNormalizationDescriptor &descriptor)
 
arm_compute::Status ClL2NormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor)
 
arm_compute::Status ClLogicalAndWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClLogicalNotWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClLogicalOrWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClLogSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const LogSoftmaxDescriptor &descriptor)
 
arm_compute::Status ClLogWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status ClMaximumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClMeanValidate (const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor)
 
arm_compute::Status ClMinimumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClMultiplicationWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClNegWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor)
 
arm_compute::Status ClPadValidate (const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor)
 
arm_compute::Status ClPermuteWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor)
 
arm_compute::Status ClPooling2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor)
 
arm_compute::Status ClPooling3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling3dDescriptor &descriptor)
 
arm_compute::Status ClPreluWorkloadValidate (const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output)
 
arm_compute::Status ClQLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const TensorInfo &output, const QLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status ClQuantizedLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &previousCellStateIn, const TensorInfo &previousOutputIn, const TensorInfo &cellStateOut, const TensorInfo &output, const QuantizedLstmInputParamsInfo &paramsInfo)
 
arm_compute::Status ClQuantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClReduceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
 
arm_compute::Status ClReshapeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClResizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor)
 
arm_compute::Status ClReverseV2WorkloadValidate (const TensorInfo &input, const TensorInfo &axis, const TensorInfo &output)
 
arm_compute::Status ClRsqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClSinWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SliceDescriptor &descriptor)
 
arm_compute::Status ClSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
 
arm_compute::Status ClSpaceToBatchNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor)
 
arm_compute::Status ClSpaceToDepthWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor)
 
arm_compute::Status ClSplitterWorkloadValidate (const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, unsigned int splitAxis)
 
arm_compute::Status ClSqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClStackWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const StackDescriptor &descriptor)
 
arm_compute::Status ClStridedSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor)
 
arm_compute::Status ClSubtractionValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClTileWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TileDescriptor &descriptor)
 
arm_compute::Status ClTransposeConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
arm_compute::Status ClTransposeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeDescriptor &descriptor)
 
arm_compute::Status ClUnidirectionalSequenceLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
std::string GetConvolutionMethodString (arm_compute::ConvolutionMethod &convolutionMethod)
 
template<typename T >
void CopyArmComputeClTensorData (arm_compute::CLTensor &dstTensor, const T *srcData)
 
auto SetClStridedSliceData (const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
 
auto SetClSliceData (const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)
 
void InitializeArmComputeClTensorData (arm_compute::CLTensor &clTensor, const ConstTensorHandle *handle)
 
RuntimeException WrapClError (const cl::Error &clError, const CheckLocation &location)
 
void RunClFunction (arm_compute::IFunction &function, const CheckLocation &location)
 
template<typename DataType , typename PayloadType >
DataTypeGetOutputTensorData (unsigned int idx, const PayloadType &data)
 
template<typename T >
void DeleteAsType (const void *const blob)
 
SubgraphView::InputSlots CreateInputsFrom (Layer *layer)
 
SubgraphView::OutputSlots CreateOutputsFrom (Layer *layer)
 
SubgraphView::SubgraphViewPtr CreateSubgraphViewFrom (SubgraphView::InputSlots &&inputs, SubgraphView::OutputSlots &&outputs, SubgraphView::Layers &&layers)
 
constexpr const char * GpuFsaBackendId ()
 
template<typename ... Args>
bool IsGpuFsaBackendSupported (Optional< std::string & > reasonIfUnsupported, Args... args)
 
constexpr const char * GpuFsaTensorHandleFactoryId ()
 
template<DataType ArmnnType>
bool IsDataType (const WorkloadInfo &info)
 
arm_compute::Status GpuFsaActivationValidate (const TensorInfo &input, const ActivationDescriptor &descriptor)
 
void GpuFsaActivationCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ActivationDescriptor &descriptor)
 
arm_compute::Status GpuFsaBatchMatMulValidate (const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)
 
void GpuFsaBatchMatMulCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)
 
arm_compute::Status GpuFsaCastValidate (const TensorInfo &input, const TensorInfo &output)
 
void GpuFsaCastCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status GpuFsaConvolution2dValidate (const TensorInfo &input, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
void GpuFsaConvolution2dCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
arm_compute::Status GpuFsaDepthwiseConvolution2dValidate (const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
void GpuFsaDepthwiseConvolution2dCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
arm_compute::Status GpuFsaElementwiseBinaryValidate (const TensorInfo &input0, const TensorInfo &input1, const ElementwiseBinaryDescriptor &descriptor)
 
void GpuFsaElementwiseBinaryCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const ElementwiseBinaryDescriptor &descriptor)
 
arm_compute::Status GpuFsaPooling2dValidate (const TensorInfo &input, const Pooling2dDescriptor &descriptor)
 
void GpuFsaPooling2dCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Pooling2dDescriptor &descriptor)
 
arm_compute::Status GpuFsaReshapeValidate (const TensorInfo &input, const ReshapeDescriptor &descriptor)
 
void GpuFsaReshapeCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ReshapeDescriptor &descriptor)
 
arm_compute::Status GpuFsaResizeValidate (const TensorInfo &input, const ResizeDescriptor &descriptor)
 
void GpuFsaResizeCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ResizeDescriptor &descriptor)
 
arm_compute::Status GpuFsaSoftmaxValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
 
void GpuFsaSoftmaxCreateOp (GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
 
arm_compute::Status GpuFsaConstantWorkloadValidate (const TensorInfo &output)
 
bool GpuFsaPreCompiledWorkloadValidate (std::string *reasonIfUnsupported)
 
constexpr const char * NeonBackendId ()
 
bool CollapseLeadingUnitDimensions (const TensorInfo &in, TensorInfo &out)
 
template<typename SlotListType >
void BuildAddMulAddSlotLists (bool handleReLu, bool multipleOutputs, std::vector< SlotListType > &inputLayersSlotLists, std::vector< SlotListType > &outputLayersSlotLists)
 
void GetFusedName (Layer *layerList[4], std::string &fusedName)
 
template<typename Type >
bool BuildAddMulAddTensorInfoLists (Type *layerList[4], unsigned int &numInputs, unsigned int &numOutputs, std::vector< TensorInfo > &inputInfos, std::vector< TensorInfo > &outputInfos, const ActivationDescriptor *&activationDescriptor, bool &fuseReLu)
 
bool IsLayerTypeSupported (const LayerType &type, const std::vector< TensorInfo > &infos, const BaseDescriptor &descriptor, const Optional< LstmInputParamsInfo > &lstmParamsInfo, const Optional< QuantizedLstmInputParamsInfo > &quantizedLstmParamsInfo, Optional< std::string & > reasonIfUnsupported, const NeonLayerSupport &support)
 
constexpr const char * NeonTensorHandleFactoryId ()
 
arm_compute::Status NeonAbsWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonActivationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor)
 
arm_compute::Status NeonAdditionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonArgMinMaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ArgMinMaxDescriptor &descriptor)
 
arm_compute::Status NeonBatchMatMulValidate (const TensorInfo &inputInfoX, const TensorInfo &inputInfoY, const TensorInfo &outputInfo, const BatchMatMulDescriptor &descriptor, const bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonBatchNormalizationValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonBatchToSpaceNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor)
 
arm_compute::Status NeonCastValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonChannelShuffleValidate (const TensorInfo &input, const TensorInfo &output, const ChannelShuffleDescriptor &descriptor)
 
arm_compute::Status NeonComparisonWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ComparisonDescriptor &descriptor)
 
arm_compute::Status NeonConcatWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const OriginsDescriptor &descriptor)
 
arm_compute::Status NeonConstantWorkloadValidate (const TensorInfo &output)
 
arm_compute::Status NeonConvertFp16ToFp32WorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonConvertFp32ToFp16WorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonConvolution3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution3dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonDepthToSpaceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthToSpaceDescriptor &descriptor)
 
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonDequantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::DetectionPostProcessLayerInfo MakeInfo (const DetectionPostProcessDescriptor &descriptor)
 
arm_compute::Status NeonDetectionPostProcessValidate (const TensorInfo &boxEncodings, const TensorInfo &scores, const TensorInfo &anchors, const TensorInfo &detectionBoxes, const TensorInfo &detectionClasses, const TensorInfo &detectionScores, const TensorInfo &numDetections, const DetectionPostProcessDescriptor &descriptor)
 
arm_compute::Status NeonDivisionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonElementwiseBinaryWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ElementwiseBinaryDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonExpWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonFullyConnectedWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonFusedWorkloadValidate (const std::vector< std::reference_wrapper< TensorInfo >> &inputInfos, const std::vector< std::reference_wrapper< TensorInfo >> &outputInfos, const FusedDescriptor &fusedDescriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonGatherNdWorkloadValidate (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo)
 
arm_compute::Status NeonGatherWorkloadValidate (const TensorInfo &input, const TensorInfo &indices, const TensorInfo &output, const GatherDescriptor &descriptor)
 
arm_compute::Status NeonInstanceNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const InstanceNormalizationDescriptor &descriptor)
 
arm_compute::Status NeonL2NormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor)
 
arm_compute::Status NeonLogicalAndWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status NeonLogicalNotWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonLogicalOrWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status NeonLogSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const LogSoftmaxDescriptor &descriptor)
 
arm_compute::Status NeonLogWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonMaximumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status NeonMeanWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor)
 
arm_compute::Status NeonMinimumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 Validate function for validating the inputs and output. More...
 
arm_compute::Status NeonMultiplicationWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonNegWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor)
 
arm_compute::Status NeonPadWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor)
 
arm_compute::Status NeonPermuteWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor)
 
arm_compute::Status NeonPooling2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor)
 
arm_compute::Status NeonPooling3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling3dDescriptor &descriptor)
 
arm_compute::Status NeonPreluWorkloadValidate (const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output)
 
arm_compute::Status NeonQLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const TensorInfo &output, const QLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonQuantizedLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const QuantizedLstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonQuantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonReduceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
 
arm_compute::Status NeonReshapeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonResizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor)
 
arm_compute::Status NeonReverseV2WorkloadValidate (const TensorInfo &input, const TensorInfo &axis, const TensorInfo &output)
 
arm_compute::Status NeonRsqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonSinWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SliceDescriptor &descriptor)
 
arm_compute::Status NeonSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
 
arm_compute::Status NeonSpaceToBatchNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor)
 
arm_compute::Status NeonSpaceToDepthWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor)
 
arm_compute::Status NeonSplitterWorkloadValidate (const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, unsigned int splitAxis)
 
arm_compute::Status NeonSqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonStackWorkloadValidate (const std::vector< const TensorInfo * > &inputs, const TensorInfo &output, const StackDescriptor &descriptor)
 
arm_compute::Status NeonStridedSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor)
 
arm_compute::Status NeonSubtractionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonTileWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TileDescriptor &descriptor)
 
arm_compute::Status NeonTransposeConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
arm_compute::Status NeonTransposeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeDescriptor &descriptor)
 
arm_compute::Status NeonUnidirectionalSequenceLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonUnidirectionalSequenceLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
template<typename T >
void CopyArmComputeTensorData (arm_compute::Tensor &dstTensor, const T *srcData)
 
void InitializeArmComputeTensorData (arm_compute::Tensor &tensor, TensorInfo tensorInfo, const ITensorHandle *handle)
 
void InitializeArmComputeTensorData (arm_compute::Tensor &tensor, const ConstTensorHandle *handle)
 
auto SetNeonStridedSliceData (const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
 
auto SetNeonSliceData (const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)
 
constexpr const char * RefBackendId ()
 
constexpr const char * RefTensorHandleFactoryId ()
 
bool IsSigned64 (const WorkloadInfo &info)
 
bool IsSigned32 (const WorkloadInfo &info)
 
bool IsBFloat16 (const WorkloadInfo &info)
 
bool IsFloat16 (const WorkloadInfo &info)
 
bool IsQSymmS16 (const WorkloadInfo &info)
 
bool IsQSymmS8 (const WorkloadInfo &info)
 
bool IsQAsymmS8 (const WorkloadInfo &info)
 
bool IsQAsymmU8 (const WorkloadInfo &info)
 
template<typename QueueDescriptorType >
constexpr bool IsOperationQueueDescriptor (const QueueDescriptorType &)
 
template<>
constexpr bool IsOperationQueueDescriptor (const MemCopyQueueDescriptor &)
 
template<>
constexpr bool IsOperationQueueDescriptor (const ConstantQueueDescriptor &)
 
template<>
constexpr bool IsOperationQueueDescriptor (const PermuteQueueDescriptor &)
 
float Activation (float in, ActivationFunction function, float a, float b)
 
void Activation (Decoder< float > &in, Encoder< float > &out, const TensorInfo &tensorInfo, ActivationFunction function, float a, float b)
 
template<typename OUT >
void ArgMinMax (Decoder< float > &in, OUT *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
 
template void ArgMinMax (Decoder< float > &in, int32_t *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
 
template void ArgMinMax (Decoder< float > &in, int64_t *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
 
void BatchNormImpl (const BatchNormalizationQueueDescriptor &data, Decoder< float > &meanDecoder, Decoder< float > &varianceDecoder, Decoder< float > &betaDecoder, Decoder< float > &gammaDecoder, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
 
unsigned int Offset (const TensorShape &shape, unsigned int batch, unsigned int height, unsigned int width, unsigned int channels, const DataLayoutIndexed &dataLayout)
 
void BatchToSpaceNd (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const BatchToSpaceNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
 
void Concatenate (const ConcatQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)
 
void Convolve3d (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int paddingFront, unsigned int xStride, unsigned int yStride, unsigned int zStride, unsigned int xDilation, unsigned int yDilation, unsigned int zDilation)
 
void Convolve (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)
 
template<typename T >
void PrintOutput (const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, std::ostream &os)
 
template<typename T >
void Debug (const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< BFloat16 > (const TensorInfo &inputInfo, const BFloat16 *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< Half > (const TensorInfo &inputInfo, const Half *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< float > (const TensorInfo &inputInfo, const float *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< uint8_t > (const TensorInfo &inputInfo, const uint8_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< int8_t > (const TensorInfo &inputInfo, const int8_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< int16_t > (const TensorInfo &inputInfo, const int16_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< int32_t > (const TensorInfo &inputInfo, const int32_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template void Debug< int64_t > (const TensorInfo &inputInfo, const int64_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
 
template<typename T >
std::unique_ptr< Decoder< T > > MakeDecoder (const TensorInfo &info, const void *data=nullptr)
 
template<>
std::unique_ptr< Decoder< float > > MakeDecoder (const TensorInfo &info, const void *data)
 
void DepthToSpace (const TensorInfo &inputInfo, const DepthToSpaceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
 
void Dequantize (Decoder< float > &inputDecoder, Encoder< float > &outputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo)
 
std::vector< unsigned int > GenerateRangeK (unsigned int k)
 
void TopKSort (unsigned int k, unsigned int *indices, const float *values, unsigned int numElement)
 
float IntersectionOverUnion (const float *boxI, const float *boxJ)
 
std::vector< unsigned int > NonMaxSuppression (unsigned int numBoxes, const std::vector< float > &boxCorners, const std::vector< float > &scores, float nmsScoreThreshold, unsigned int maxDetection, float nmsIouThreshold)
 
void AllocateOutputData (unsigned int numOutput, unsigned int numSelected, const std::vector< float > &boxCorners, const std::vector< unsigned int > &outputIndices, const std::vector< unsigned int > &selectedBoxes, const std::vector< unsigned int > &selectedClasses, const std::vector< float > &selectedScores, float *detectionBoxes, float *detectionScores, float *detectionClasses, float *numDetections)
 
void DetectionPostProcess (const TensorInfo &boxEncodingsInfo, const TensorInfo &scoresInfo, const TensorInfo &anchorsInfo, const TensorInfo &detectionBoxesInfo, const TensorInfo &detectionClassesInfo, const TensorInfo &detectionScoresInfo, const TensorInfo &numDetectionsInfo, const DetectionPostProcessDescriptor &desc, Decoder< float > &boxEncodings, Decoder< float > &scores, Decoder< float > &anchors, float *detectionBoxes, float *detectionClasses, float *detectionScores, float *numDetections)
 
template<typename T >
std::unique_ptr< Encoder< T > > MakeEncoder (const TensorInfo &info, void *data=nullptr)
 
template<>
std::unique_ptr< Encoder< float > > MakeEncoder (const TensorInfo &info, void *data)
 
void Fill (Encoder< float > &output, const TensorShape &desiredOutputShape, const float value)
 Creates a tensor and fills it with a scalar value. More...
 
void FullyConnected (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *rBiasDecoder, bool biasEnabled, unsigned int K, bool transposeWeights)
 Performs a matrix multiplication and optionally adds a bias. More...
 
void Gather (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo, Decoder< float > &params, const int32_t *indices, Encoder< float > &output, const int32_t axis_int)
 
void InstanceNorm (const InstanceNormalizationQueueDescriptor &data, const TensorInfo &inputInfo, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
 
void LogSoftmax (Decoder< float > &input, Encoder< float > &output, const TensorInfo &inputInfo, const LogSoftmaxDescriptor &descriptor)
 
void LstmImpl (const LstmDescriptor &descriptor, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const TensorShape &inputToOutputWeightsShape, const TensorShape &recurrentToOutputWeightsShape, std::unique_ptr< Decoder< float >> &inputData, std::unique_ptr< Decoder< float >> &outputStateIn, std::unique_ptr< Decoder< float >> &cellStateIn, std::unique_ptr< Encoder< float >> &outputStateOut, std::unique_ptr< Encoder< float >> &cellStateOut, std::unique_ptr< Encoder< float >> &output, std::unique_ptr< Decoder< float >> &cellStateOutDecoder, std::unique_ptr< Decoder< float >> &outputDecoder, std::unique_ptr< Decoder< float >> &inputToInputWeightsTensor, std::unique_ptr< Decoder< float >> &inputToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &inputToCellWeightsTensor, std::unique_ptr< Decoder< float >> &inputToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToInputWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToCellWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &cellToInputWeightsTensor, std::unique_ptr< Decoder< float >> &cellToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &cellToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &inputGateBiasTensor, std::unique_ptr< Decoder< float >> &forgetGateBiasTensor, std::unique_ptr< Decoder< float >> &cellBiasTensor, std::unique_ptr< Decoder< float >> &outputGateBiasTensor, std::unique_ptr< Decoder< float >> &projectionWeightsTensor, std::unique_ptr< Decoder< float >> &projectionBiasTensor, std::unique_ptr< Decoder< float >> &inputLayerNormWeights, std::unique_ptr< Decoder< float >> &forgetLayerNormWeights, std::unique_ptr< Decoder< float >> &cellLayerNormWeights, std::unique_ptr< Decoder< float >> &outputLayerNormWeights, std::unique_ptr< Encoder< float >> &inputGateScratch, std::unique_ptr< Encoder< float >> &cellScratch, std::unique_ptr< Encoder< float >> &forgetGateScratch, std::unique_ptr< Encoder< float >> &outputGateScratch, std::unique_ptr< Decoder< float >> &inputGateScratchDecoder, std::unique_ptr< Decoder< float >> &cellScratchDecoder, std::unique_ptr< Decoder< float >> &forgetGateScratchDecoder, std::unique_ptr< Decoder< float >> &outputGateScratchDecoder, float layerNormEpsilon)
 
void MirrorPad (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle *inputHandle, ITensorHandle *outputHandle, const PadQueueDescriptor &data)
 
void Pad (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle *inputHandle, ITensorHandle *outputHandle, const PadQueueDescriptor &data)
 
void Pooling2d (Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling2dDescriptor &params)
 Computes the Pooling2d operation. More...
 
void Pooling3d (Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling3dDescriptor &params)
 Computes the Pooling3d operation. More...
 
void PreluImpl (const TensorInfo &inputInfo, const TensorInfo &alphaInfo, const TensorInfo &outputInfo, Decoder< float > &inputData, Decoder< float > &alphaData, Encoder< float > &outputData)
 
bool NextIndex (const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &current)
 
unsigned int ReducedOutputOffset (const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &index, const unsigned int numAxis, const std::vector< unsigned int > &axis)
 
void Reduce (const TensorInfo &inputInfo, const TensorInfo &outputInfo, Decoder< float > &input, Encoder< float > &output, const std::vector< uint32_t > axis, const ReduceOperation reduceOperation)
 
template<typename DataType >
void ExecuteFunction (std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs, BinaryOperation operation)
 
void FakeQuantization (const float *inputData, float *outputData, uint32_t numElements, float min, float max)
 
unsigned int GetNumActivations (const TensorInfo &inputInfo)
 
template<typename TensorHandleType = RefTensorHandle>
const TensorInfoGetTensorInfo (const ITensorHandle *tensorHandle)
 float32 helpers More...
 
template<typename DataType , typename PayloadType >
const DataTypeGetInputTensorData (unsigned int idx, const PayloadType &data)
 
template<typename DataType >
DataTypeGetOutputTensorData (ITensorHandle *tensorHandle)
 
template<typename PayloadType >
const float * GetInputTensorDataFloat (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
float * GetOutputTensorDataFloat (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
const HalfGetInputTensorDataHalf (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
HalfGetOutputTensorDataHalf (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
const BFloat16GetInputTensorDataBFloat16 (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
BFloat16GetOutputTensorDataBFloat16 (unsigned int idx, const PayloadType &data)
 
template<typename T >
std::vector< float > Dequantize (const T *quant, const TensorInfo &info)
 u8 helpers More...
 
template<typename T >
void Dequantize (const T *inputData, float *outputData, const TensorInfo &info)
 
void Quantize (uint8_t *quant, const float *dequant, const TensorInfo &info)
 
void Resize (Decoder< float > &in, const TensorInfo &inputInfo, Encoder< float > &out, const TensorInfo &outputInfo, DataLayoutIndexed dataLayout, ResizeMethod resizeMethod, bool alignCorners, bool halfPixelCenters)
 
std::vector< unsigned int > ReverseGetMultIdx (const unsigned int idx, unsigned int inputRank, std::vector< unsigned int > &elementNumInner)
 
unsigned int ReverseGetFlatIdx (const std::vector< unsigned int > &idxList, unsigned int inputRank, std::vector< unsigned int > &elementNumInner)
 
unsigned int ReverseRelocateIdx (unsigned int idx, unsigned int inputRank, std::vector< bool > &axisFlag, std::vector< unsigned int > &dimSize, std::vector< unsigned int > &elementNumInner)
 
void ReverseV2 (const TensorInfo &inputInfo, const TensorInfo &axisInfo, Decoder< float > &inputDecoder, Decoder< int > &axisDecoder, Encoder< float > &outputEncoder)
 
void Slice (const TensorInfo &inputInfo, const SliceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
 
void Softmax (Decoder< float > &in, Encoder< float > &out, const TensorInfo &inputTensorInfo, float beta, int axis)
 Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. More...
 
unsigned int GetOffset (const TensorShape &shape, unsigned int b, unsigned int h, unsigned int w, unsigned int c, const DataLayoutIndexed &dataLayout)
 
void SpaceToBatchNd (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToBatchNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
 
void SpaceToDepth (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToDepthDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
 
void Split (const SplitterQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)
 
template<typename DataType >
void Splitter (const SplitterQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)
 
void Stack (const StackQueueDescriptor &data, std::vector< std::unique_ptr< Decoder< float >>> &inputs, Encoder< float > &output, const TensorInfo &inputInfo, const TensorInfo &outputInfo)
 
void StridedSlice (const TensorInfo &inputInfo, const StridedSliceDescriptor &params, const void *inputData, void *outputData, unsigned int dataTypeSize)
 
std::vector< uint32_t > IndexToCoordinates (std::vector< uint32_t > &shape, uint32_t index)
 
uint32_t CoordinatesToIndex (TensorShape &shape, std::vector< uint32_t > &coordinates)
 
void Tile (const TileDescriptor &params, const TensorInfo &inputInfo, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
 
void TransposeConvolution2dImpl (const TransposeConvolution2dDescriptor &descriptor, const TensorShape &inputShape, Decoder< float > &inputDecoder, const TensorShape &outputShape, Encoder< float > &outputEncoder, const TensorShape &weightsShape, Decoder< float > &weightsDecoder, Decoder< float > *biasesDecoder)
 
constexpr const char * TosaRefBackendId ()
 
constexpr const char * TosaRefTensorHandleFactoryId ()
 
bool TosaRefPreCompiledWorkloadValidate (std::string *)
 

Variables

constexpr unsigned int MaxNumOfTensorDimensions = 5U
 
constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u
 The lowest performance data capture interval we support is 10 miliseconds. More...
 
constexpr unsigned int EXPIRE_RATE = 3U
 Variable to control expire rate of priority queue. More...
 
constexpr std::size_t g_ProfilingEventCountHint = 1024
 
constexpr bool g_WriteProfilingEventSequence = true
 
constexpr bool g_AggregateProfilingEventsByInference = true
 
constexpr bool g_WriteReportToStdOutOnProfilerDestruction = false
 
thread_local IProfilertl_Profiler = nullptr
 
constexpr size_t wordSize = sizeof(size_t) * 8
 
const BackendCapabilities gpuFsaCapabilities ("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", false} })
 
const BackendCapabilities cpuAccCapabilities ("CpuAcc", { {"NonConstWeights", true}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", arm_compute::CPUInfo::get().has_fp16()} })
 
const std::set< armnn::LayerTypepaddingRequiredLayers
 
const BackendCapabilities cpuRefCapabilities ("CpuRef", { {"NonConstWeights", true}, {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", true} })
 
const std::set< armnn::BackendCapabilityoldCpuRefCapabilities
 

Detailed Description

Copyright (c) 2021 ARM Limited and Contributors.

Optional is a drop in replacement for std::optional until we migrate to c++-17.

Copyright (c) 2022-2023 ARM Limited and Contributors.

Copyright (c) 2021-2023 ARM Limited and Contributors.

Copyright (c) 2022-2023 Arm Ltd and Contributors.

Copyright (c) 2022 ARM Limited and Contributors.

Create pages for each tool so they appear nicely in the doxygen tree-view.

Copyright (c) 2021, 2023 ARM Limited and Contributors.

All rights reserved.

SPDX-License-Identifier: MIT

Subpages are not listed there. Also we can overwrite the page name this way.

Subpages are not listed there.

Note: The parser, serializer and deserializer pages are created in 01_parsers.dox or 02_deserializer_serializer.dox

Only a subset of the optional features are implemented that we intend to use in ArmNN. There are two distinct implementations here:

1, for normal constructable/destructable types and reference types 2, for reference types The std::optional features we support are:

  • has_value() and operator bool() to tell if the optional has a value
  • value() returns a reference to the held object

Typedef Documentation

◆ ACLMemManagerOnDemand

using ACLMemManagerOnDemand = std::shared_ptr<arm_compute::MemoryManagerOnDemand>

Definition at line 22 of file NeonFullyConnectedWorkload.cpp.

◆ AdditionalInfoObjectPtr

using AdditionalInfoObjectPtr = std::shared_ptr<void>

Definition at line 228 of file Layer.hpp.

◆ BackendCapabilities

Definition at line 19 of file BackendOptions.hpp.

◆ BackendIdSet

using BackendIdSet = std::unordered_set<BackendId>

Definition at line 193 of file BackendId.hpp.

◆ BackendIdVector

using BackendIdVector = std::vector<BackendId>

Definition at line 192 of file BackendId.hpp.

◆ BackendsMap

using BackendsMap = std::map<BackendId, std::unique_ptr<class IBackendInternal> >

Definition at line 282 of file Network.hpp.

◆ BaseFloat32ComparisonWorkload

◆ BaseUint8ComparisonWorkload

◆ BFloat16ToFloat32Workload

◆ BindingPointInfo

Definition at line 276 of file Tensor.hpp.

◆ BooleanWorkload

◆ CompiledBlobDeleter

typedef std::function< void(const void *)> CompiledBlobDeleter

Definition at line 342 of file INetwork.hpp.

◆ CompiledBlobPtr

typedef std::unique_ptr< void, CompiledBlobDeleter > CompiledBlobPtr

Definition at line 343 of file INetwork.hpp.

◆ ConcatDescriptor

Definition at line 59 of file DescriptorsFwd.hpp.

◆ Coordinates

using Coordinates = std::array<unsigned int, MaxNumOfTensorDimensions>

Definition at line 15 of file InternalTypes.hpp.

◆ CopyAndImportFactoryPairs

◆ DebugCallbackFunction

using DebugCallbackFunction = std::function<void(LayerGuid guid, unsigned int slotIndex, ITensorHandle* tensorHandle)>

Define the type of callback for the Debug layer to call.

Parameters
guid- guid of layer connected to the input of the Debug layer
slotIndex- index of the output slot connected to the input of the Debug layer
tensorHandle- TensorHandle for the input tensor to the Debug layer

Definition at line 398 of file Types.hpp.

◆ DepthToSpaceDescriptor

A DepthToSpaceDescriptor for the DepthToSpaceLayer.

Definition at line 1099 of file Descriptors.hpp.

◆ Dimensions

using Dimensions = std::array<unsigned int, MaxNumOfTensorDimensions>

Definition at line 16 of file InternalTypes.hpp.

◆ DynamicBackendPtr

using DynamicBackendPtr = std::unique_ptr<DynamicBackend>

Definition at line 54 of file DynamicBackend.hpp.

◆ FactoryId

◆ Float16ToFloat32Workload

◆ Float32ToBFloat16Workload

◆ Float32ToFloat16Workload

◆ Float32Workload

◆ FloatWorkload

◆ Half

typedef half_float::half Half

Definition at line 22 of file Half.hpp.

◆ HighResolutionClock

using HighResolutionClock = std::chrono::high_resolution_clock::time_point

Define a timer and associated inference ID for recording execution times.

Definition at line 401 of file Types.hpp.

◆ IBackendContextUniquePtr

using IBackendContextUniquePtr = std::unique_ptr<IBackendContext>

Definition at line 34 of file IBackendContext.hpp.

◆ IBackendInternalUniquePtr

typedef std::unique_ptr< IBackendInternal > IBackendInternalUniquePtr

Definition at line 32 of file BackendRegistry.hpp.

◆ IBackendSharedPtr

using IBackendSharedPtr = std::shared_ptr<IBackend>

Definition at line 282 of file Types.hpp.

◆ IBackendUniquePtr

using IBackendUniquePtr = std::unique_ptr<IBackend, void(*)(IBackend* backend)>

Definition at line 283 of file Types.hpp.

◆ IGpuAccTunedParametersPtr

The following API is replaced by the backend options API.

Definition at line 300 of file IRuntime.hpp.

◆ IInitialiseProfilingService

using IInitialiseProfilingService = arm::pipe::IInitialiseProfilingService

Definition at line 28 of file Runtime.hpp.

◆ ILayerSupportSharedPtr

using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>

Definition at line 40 of file ILayerSupport.hpp.

◆ IMemoryManagerUniquePtr

using IMemoryManagerUniquePtr = std::unique_ptr<IMemoryManager>

Definition at line 24 of file IMemoryManager.hpp.

◆ ImportedInputId

using ImportedInputId = unsigned int

Definition at line 310 of file Types.hpp.

◆ ImportedOutputId

using ImportedOutputId = unsigned int

Definition at line 311 of file Types.hpp.

◆ INetworkPtr

using INetworkPtr = std::unique_ptr<INetwork, void(*)(INetwork* network)>

Definition at line 339 of file INetwork.hpp.

◆ InferenceTimingPair

Definition at line 402 of file Types.hpp.

◆ InputQueueDescriptor

Definition at line 91 of file WorkloadData.hpp.

◆ InputTensors

using InputTensors = std::vector<std::pair<LayerBindingId, class ConstTensor> >

Definition at line 394 of file Tensor.hpp.

◆ Int32Workload

◆ IOptimizedNetworkPtr

using IOptimizedNetworkPtr = std::unique_ptr<IOptimizedNetwork, void(*)(IOptimizedNetwork* network)>

Definition at line 340 of file INetwork.hpp.

◆ IReportStructure

using IReportStructure = arm::pipe::IReportStructure

Definition at line 27 of file Runtime.hpp.

◆ IRuntimePtr

using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>

Definition at line 41 of file IRuntime.hpp.

◆ LayerBindingId

using LayerBindingId = int

Type of identifiers for bindable layers (inputs, outputs).

Definition at line 309 of file Types.hpp.

◆ LayerPriority

using LayerPriority = unsigned int

Definition at line 227 of file Layer.hpp.

◆ LayerTypeOf

using LayerTypeOf = typename LayerTypeOfImpl<Type>::Type

Definition at line 94 of file LayersFwd.hpp.

◆ LoadedNetworks

using LoadedNetworks = std::unordered_map<NetworkId, std::unique_ptr<LoadedNetwork> >

Definition at line 26 of file Runtime.hpp.

◆ LogSoftmaxDescriptor

A LogSoftmaxDescriptor for the LogSoftmaxLayer.

Definition at line 196 of file Descriptors.hpp.

◆ MemoryOptimizerStrategiesMapRef

using MemoryOptimizerStrategiesMapRef = std::unordered_map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy> >

Definition at line 33 of file BackendRegistry.hpp.

◆ MemorySourceFlags

using MemorySourceFlags = unsigned int

Definition at line 15 of file MemorySources.hpp.

◆ MergerDescriptor

MergerDescriptor is deprecated, use ConcatDescriptor instead.

Definition at line 63 of file DescriptorsFwd.hpp.

◆ MergerQueueDescriptor

Definition at line 149 of file WorkloadData.hpp.

◆ ModelOptions

using ModelOptions = std::vector<BackendOptions>

Definition at line 18 of file BackendOptions.hpp.

◆ NetworkId

typedef int NetworkId

Definition at line 35 of file IRuntime.hpp.

◆ NetworkImplPtr

using NetworkImplPtr = std::unique_ptr<NetworkImpl, void (*)(NetworkImpl* network)>

Definition at line 29 of file Network.hpp.

◆ NetworkOptions

using NetworkOptions = std::vector<BackendOptions>

Definition at line 16 of file BackendOptions.hpp.

◆ OutputQueueDescriptor

Definition at line 92 of file WorkloadData.hpp.

◆ OutputTensors

using OutputTensors = std::vector<std::pair<LayerBindingId, class Tensor> >

Definition at line 395 of file Tensor.hpp.

◆ ParameterStringifyFunction

using ParameterStringifyFunction = std::function<void(const std::string& name, const std::string& value)>

Definition at line 14 of file SerializeLayerParameters.hpp.

◆ PreCompiledObjectDeleter

using PreCompiledObjectDeleter = std::function<void(const void*)>

Definition at line 19 of file PreCompiledLayer.hpp.

◆ PreCompiledObjectPtr

using PreCompiledObjectPtr = std::unique_ptr<void, PreCompiledObjectDeleter>

Definition at line 20 of file PreCompiledLayer.hpp.

◆ RefAdditionWorkload

◆ RefDebugBFloat16Workload

◆ RefDebugFloat16Workload

◆ RefDebugFloat32Workload

◆ RefDebugQAsymmS8Workload

◆ RefDebugQAsymmU8Workload

◆ RefDebugQSymmS16Workload

◆ RefDebugQSymmS8Workload

◆ RefDebugSigned32Workload

◆ RefDebugSigned64Workload

◆ RefDivisionWorkload

◆ RefMaximumWorkload

◆ RefMinimumWorkload

◆ RefMultiplicationWorkload

◆ RefPermuteBFloat16Workload

◆ RefPermuteFloat16Workload

◆ RefPermuteFloat32Workload

◆ RefPermuteQAsymm8Workload

◆ RefPermuteQAsymmS8Workload

◆ RefPermuteQSymm16Workload

◆ RefSubtractionWorkload

◆ RefTransposeBFloat16Workload

◆ RefTransposeFloat16Workload

◆ RefTransposeFloat32Workload

◆ RefTransposeQAsymm8Workload

◆ RefTransposeQAsymmS8Workload

◆ RefTransposeQSymm16Workload

◆ ResolveType

using ResolveType = typename ResolveTypeImpl<DT>::Type

Definition at line 79 of file ResolveType.hpp.

◆ SplitterDescriptor

Definition at line 64 of file DescriptorsFwd.hpp.

◆ TensorInfos

using TensorInfos = std::vector<TensorInfo>

Definition at line 152 of file BackendHelper.cpp.

◆ Uint8ToFloat32Workload

◆ Uint8Workload

◆ UnidirectionalSequenceLstmDescriptor

◆ WorkloadQueue

using WorkloadQueue = std::vector< std::unique_ptr<IWorkload> >

Definition at line 13 of file ExecutionFrame.hpp.

Enumeration Type Documentation

◆ ActivationFunction

enum ActivationFunction
strong
Enumerator
Sigmoid 
TanH 
Linear 
ReLu 
BoundedReLu 

min(a, max(b, input)) ReLu1 & ReLu6.

SoftReLu 
LeakyReLu 
Abs 
Sqrt 
Square 
Elu 
HardSwish 
Gelu 

Definition at line 86 of file Types.hpp.

87 {
88  Sigmoid = 0,
89  TanH = 1,
90  Linear = 2,
91  ReLu = 3,
92  BoundedReLu = 4, ///< min(a, max(b, input)) ReLu1 & ReLu6.
93  SoftReLu = 5,
94  LeakyReLu = 6,
95  Abs = 7,
96  Sqrt = 8,
97  Square = 9,
98  Elu = 10,
99  HardSwish = 11,
100  Gelu = 12
101 };

◆ ArgMinMaxFunction

enum ArgMinMaxFunction
strong
Enumerator
Min 
Max 

Definition at line 103 of file Types.hpp.

104 {
105  Min = 0,
106  Max = 1
107 };

◆ BackendCapability

enum BackendCapability : uint32_t
strong

BackendCapability class.

Enumerator
NonConstWeights 

Constant weights can be accessed through the descriptors, On the other hand, non-const weights can be accessed through inputs.

AsyncExecution 

Asynchronous Execution.

Definition at line 286 of file Types.hpp.

286  : uint32_t
287 {
288  /// Constant weights can be accessed through the descriptors,
289  /// On the other hand, non-const weights can be accessed through inputs.
291 
292  /// Asynchronous Execution.
294 
295  // add new enum values here
296 };

◆ BinaryOperation

enum BinaryOperation
strong
Enumerator
Add 
Div 
Maximum 
Minimum 
Mul 
Sub 
SqDiff 
Power 

Definition at line 138 of file Types.hpp.

139 {
140  Add = 0,
141  Div = 1,
142  Maximum = 2,
143  Minimum = 3,
144  Mul = 4,
145  Sub = 5,
146  SqDiff = 6,
147  Power = 7
148 };

◆ BoostLogSeverityMapping

Enumerator
trace 
debug 
info 
warning 
error 
fatal 

Definition at line 196 of file Logging.hpp.

197 {
198  trace,
199  debug,
200  info,
201  warning,
202  error,
203  fatal
204 };

◆ CapabilityClass

enum CapabilityClass
strong

Capability class to calculate in the GetCapabilities function so that only the capability in the scope can be choose to calculate.

Enumerator
PaddingRequired 
FallbackImportDisabled 
CapabilityClassMax 

Definition at line 24 of file ITensorHandleFactory.hpp.

25 {
26  PaddingRequired = 1,
28 
29  // add new enum values here
30 
31  CapabilityClassMax = 254
32 };

◆ ComparisonOperation

enum ComparisonOperation
strong
Enumerator
Equal 
Greater 
GreaterOrEqual 
Less 
LessOrEqual 
NotEqual 

Definition at line 109 of file Types.hpp.

110 {
111  Equal = 0,
112  Greater = 1,
113  GreaterOrEqual = 2,
114  Less = 3,
115  LessOrEqual = 4,
116  NotEqual = 5
117 };

◆ Compute

enum Compute
strong

The Compute enum is now deprecated and it is now being replaced by BackendId.

Enumerator
Undefined 
CpuRef 

CPU Execution: Reference C++ kernels.

CpuAcc 

CPU Execution: NEON: ArmCompute.

GpuAcc 

GPU Execution: OpenCL: ArmCompute.

Definition at line 21 of file BackendId.hpp.

22 {
23  Undefined = 0,
24  /// CPU Execution: Reference C++ kernels
25  CpuRef = 1,
26  /// CPU Execution: NEON: ArmCompute
27  CpuAcc = 2,
28  /// GPU Execution: OpenCL: ArmCompute
29  GpuAcc = 3
30 };

◆ DataLayout

enum DataLayout
strong
Enumerator
NCHW 
NHWC 
NDHWC 
NCDHW 

Definition at line 62 of file Types.hpp.

63 {
64  NCHW = 1,
65  NHWC = 2,
66  NDHWC = 3,
67  NCDHW = 4
68 };

◆ DataType

enum DataType
strong
Enumerator
Float16 
Float32 
QAsymmU8 
Signed32 
Boolean 
QSymmS16 
QSymmS8 
QAsymmS8 
BFloat16 
Signed64 

Definition at line 48 of file Types.hpp.

49 {
50  Float16 = 0,
51  Float32 = 1,
52  QAsymmU8 = 2,
53  Signed32 = 3,
54  Boolean = 4,
55  QSymmS16 = 5,
56  QSymmS8 = 6,
57  QAsymmS8 = 7,
58  BFloat16 = 8,
59  Signed64 = 9,
60 };

◆ Dimensionality

enum Dimensionality
strong
Enumerator
NotSpecified 
Specified 
Scalar 

Definition at line 172 of file Types.hpp.

173 {
174  NotSpecified = 0,
175  Specified = 1,
176  Scalar = 2
177 };

◆ EdgeStrategy

enum EdgeStrategy
strong
Enumerator
Undefined 
DirectCompatibility 

No strategy has been defined. Used internally to verify integrity of optimizations.

ExportToTarget 

Destination backend can work directly with tensors on source backend.

CopyToTarget 

Source backends tensor data can be exported to destination backend tensor without copy.

Copy contents from source backend tensor to destination backend tensor.

Definition at line 104 of file ITensorHandleFactory.hpp.

105 {
106  Undefined, /// No strategy has been defined. Used internally to verify integrity of optimizations.
107  DirectCompatibility, /// Destination backend can work directly with tensors on source backend.
108  ExportToTarget, /// Source backends tensor data can be exported to destination backend tensor without copy.
109  CopyToTarget /// Copy contents from source backend tensor to destination backend tensor.
110 };

◆ FusedKernelType

enum FusedKernelType
strong
Enumerator
AddMulAdd 

Definition at line 266 of file Types.hpp.

267 {
268  AddMulAdd = 0
269 };

◆ GraphEvent

enum GraphEvent
strong
Enumerator
LayerAdded 
LayerErased 

Definition at line 12 of file IGraphObservable.hpp.

13 {
14  LayerAdded,
16 };

◆ JsonObjectType

enum JsonObjectType
strong
Enumerator
Measurement 
Event 
ExecObjectDesc 

Definition at line 20 of file JsonPrinter.hpp.

21 {
23  Event,
25 };

◆ LayerType

enum LayerType
strong

When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.

Enumerator
Activation 
Addition 
ArgMinMax 
BatchNormalization 
BatchToSpaceNd 
Comparison 
Concat 
Constant 
ConvertFp16ToFp32 
ConvertFp32ToFp16 
Convolution2d 
Debug 
DepthToSpace 
DepthwiseConvolution2d 
Dequantize 
DetectionPostProcess 
Division 
ElementwiseUnary 
FakeQuantization 
Fill 
Floor 
FullyConnected 
Gather 
Input 
InstanceNormalization 
L2Normalization 
LogicalBinary 
LogSoftmax 
Lstm 
QLstm 
Map 
Maximum 
Mean 
MemCopy 
MemImport 
Merge 
Minimum 
Multiplication 
Normalization 
Output 
Pad 
Permute 
Pooling2d 
PreCompiled 
Prelu 
Quantize 
QuantizedLstm 
Reshape 
Rank 
Resize 
Reduce 
Slice 
Softmax 
SpaceToBatchNd 
SpaceToDepth 
Splitter 
Stack 
StandIn 
StridedSlice 
Subtraction 
Switch 
Transpose 
TransposeConvolution2d 
Unmap 
Cast 
Shape 
UnidirectionalSequenceLstm 
ChannelShuffle 
Convolution3d 
Pooling3d 
GatherNd 
BatchMatMul 
ElementwiseBinary 
ReverseV2 
Tile 
Fused 
BroadcastTo 
FirstLayer 
LastLayer 

Definition at line 491 of file Types.hpp.

492 {
493 #define X(name) name,
495 #undef X
498 };

◆ LogicalBinaryOperation

Enumerator
LogicalAnd 
LogicalOr 

Definition at line 119 of file Types.hpp.

120 {
121  LogicalAnd = 0,
122  LogicalOr = 1
123 };

◆ LogSeverity

enum LogSeverity
strong
Enumerator
Trace 
Debug 
Info 
Warning 
Error 
Fatal 

Definition at line 13 of file Utils.hpp.

14 {
15  Trace,
16  Debug,
17  Info,
18  Warning,
19  Error,
20  Fatal
21 };

◆ MemBlockStrategyType

enum MemBlockStrategyType
strong
Enumerator
SingleAxisPacking 
MultiAxisPacking 

Definition at line 253 of file Types.hpp.

254 {
255  // MemBlocks can be packed on the Y axis only, overlap allowed on X axis.
256  // In other words MemBlocks with overlapping lifetimes cannot use the same MemBin,
257  // equivalent to blob or pooling memory management.
258  SingleAxisPacking = 0,
259 
260  // MemBlocks can be packed on either Y or X axis but cannot overlap on both.
261  // In other words MemBlocks with overlapping lifetimes can use the same MemBin,
262  // equivalent to offset or slab memory management.
263  MultiAxisPacking = 1
264 };

◆ MemorySource

enum MemorySource : uint32_t
strong

Define the Memory Source to reduce copies.

Enumerator
Undefined 
Malloc 
DmaBuf 
DmaBufProtected 
Gralloc 

Definition at line 244 of file Types.hpp.

244  : uint32_t
245 {
246  Undefined = 0,
247  Malloc = 1,
248  DmaBuf = 2,
249  DmaBufProtected = 4,
250  Gralloc = 8
251 };

◆ NormalizationAlgorithmChannel

Enumerator
Across 
Within 

Definition at line 207 of file Types.hpp.

208 {
209  Across = 0,
210  Within = 1
211 };

◆ NormalizationAlgorithmMethod

Enumerator
LocalBrightness 

Krichevsky 2012: Local Brightness Normalization.

LocalContrast 

Jarret 2009: Local Contrast Normalization.

Definition at line 213 of file Types.hpp.

214 {
215  /// Krichevsky 2012: Local Brightness Normalization
216  LocalBrightness = 0,
217  /// Jarret 2009: Local Contrast Normalization
218  LocalContrast = 1
219 };

◆ OutputShapeRounding

enum OutputShapeRounding
strong
Enumerator
Floor 
Ceiling 

Definition at line 221 of file Types.hpp.

222 {
223  Floor = 0,
224  Ceiling = 1
225 };

◆ PaddingMethod

enum PaddingMethod
strong

The padding method modifies the output of pooling layers.

In both supported methods, the values are ignored (they are not even zeroes, which would make a difference for max pooling a tensor with negative values). The difference between IgnoreValue and Exclude is that the former counts the padding fields in the divisor of Average and L2 pooling, while Exclude does not.

Enumerator
IgnoreValue 

The padding fields count, but are ignored.

Exclude 

The padding fields don't count and are ignored.

Definition at line 188 of file Types.hpp.

189 {
190  /// The padding fields count, but are ignored
191  IgnoreValue = 0,
192  /// The padding fields don't count and are ignored
193  Exclude = 1
194 };

◆ PaddingMode

enum PaddingMode
strong

The padding mode controls whether the padding should be filled with constant values (Constant), or reflect the input, either including the border values (Symmetric) or not (Reflect).

Enumerator
Constant 
Reflect 
Symmetric 

Definition at line 200 of file Types.hpp.

201 {
202  Constant = 0,
203  Reflect = 1,
204  Symmetric = 2
205 };

◆ PoolingAlgorithm

enum PoolingAlgorithm
strong
Enumerator
Max 
Average 
L2 

Definition at line 150 of file Types.hpp.

151 {
152  Max = 0,
153  Average = 1,
154  L2 = 2
155 };

◆ ProfilingDetailsMethod

Define the behaviour of the internal profiler when outputting network details.

Enumerator
Undefined 
DetailsWithEvents 
DetailsOnly 

Definition at line 71 of file Types.hpp.

72 {
73  Undefined = 0,
75  DetailsOnly = 2
76 };

◆ QosExecPriority

enum QosExecPriority
strong
Enumerator
Low 
Medium 
High 

Definition at line 79 of file Types.hpp.

80 {
81  Low = 0,
82  Medium = 1,
83  High = 2
84 };

◆ ReduceOperation

enum ReduceOperation
strong
Enumerator
Sum 
Max 
Mean 
Min 
Prod 

Definition at line 157 of file Types.hpp.

158 {
159  Sum = 0,
160  Max = 1,
161  Mean = 2,
162  Min = 3,
163  Prod = 4
164 };

◆ ResizeMethod

enum ResizeMethod
strong
Enumerator
Bilinear 
NearestNeighbor 

Definition at line 166 of file Types.hpp.

167 {
168  Bilinear = 0,
169  NearestNeighbor = 1
170 };

◆ ShapeInferenceMethod

enum ShapeInferenceMethod
strong

The ShapeInferenceMethod modify how the output shapes are treated.

When ValidateOnly is selected, the output shapes are inferred from the input parameters of the layer and any mismatch is reported. When InferAndValidate is selected 2 actions are performed: (1)infer output shape from inputs and (2)validate the shapes as in ValidateOnly. This option has been added to work with tensors which rank or dimension sizes are not specified explicitly, however this information can be calculated from the inputs.

Enumerator
ValidateOnly 

Validate all output shapes.

InferAndValidate 

Infer missing output shapes and validate all output shapes.

Definition at line 235 of file Types.hpp.

236 {
237  /// Validate all output shapes
238  ValidateOnly = 0,
239  /// Infer missing output shapes and validate all output shapes
240  InferAndValidate = 1
241 };

◆ Status

enum Status
strong

enumeration

Enumerator
Success 
Failure 

Definition at line 42 of file Types.hpp.

43 {
44  Success = 0,
45  Failure = 1
46 };

◆ TuningLevel

enum TuningLevel
strong
Enumerator
None 
Rapid 
Normal 
Exhaustive 

Definition at line 18 of file ArmComputeTuningUtils.hpp.

19 {
20  None,
21  Rapid,
22  Normal,
24 };

◆ UnaryOperation

enum UnaryOperation
strong
Enumerator
Abs 
Exp 
Sqrt 
Rsqrt 
Neg 
LogicalNot 
Log 
Sin 
Ceil 

Definition at line 125 of file Types.hpp.

126 {
127  Abs = 0,
128  Exp = 1,
129  Sqrt = 2,
130  Rsqrt = 3,
131  Neg = 4,
132  LogicalNot = 5,
133  Log = 6,
134  Sin = 7,
135  Ceil = 8
136 };

Function Documentation

◆ Activation() [1/2]

void Activation ( Decoder< float > &  in,
Encoder< float > &  out,
const TensorInfo tensorInfo,
ActivationFunction  function,
float  a,
float  b 
)

Definition at line 102 of file Activation.cpp.

108 {
109  unsigned int numElements = tensorInfo.GetNumElements();
110 
111  for (unsigned int i = 0; i < numElements; i++)
112  {
113  out.Set(Activation(in.Get(), function, a, b));
114  ++in;
115  ++out;
116  }
117  in -= numElements;
118  out -= numElements;
119 }

References Activation(), Decoder< IType >::Get(), TensorInfo::GetNumElements(), and Encoder< IType >::Set().

◆ Activation() [2/2]

float Activation ( float  in,
ActivationFunction  function,
float  a,
float  b 
)

Definition at line 13 of file Activation.cpp.

17 {
18  float output;
19 
20  // Compute the result of the activation function.
21  switch (function)
22  {
23  case ActivationFunction::Linear:
24  {
25  output = a * in + b;
26  break;
27  }
28  case ActivationFunction::Sigmoid:
29  {
30  output = 1.f / (1.f + expf(-in));
31  break;
32  }
33  case ActivationFunction::ReLu:
34  {
35  output = std::max(0.f, in);
36  break;
37  }
38  case ActivationFunction::BoundedReLu:
39  {
40  output = std::min(a, std::max(b, in));
41  break;
42  }
43  case ActivationFunction::SoftReLu:
44  {
45  output = logf(1.0f + expf(in));
46  break;
47  }
48  case ActivationFunction::LeakyReLu:
49  {
50  output = in > 0.0f ? in : (in * a);
51  break;
52  }
53  case ActivationFunction::Abs:
54  {
55  output = in < 0 ? -in : in;
56  break;
57  }
58  case ActivationFunction::Sqrt:
59  {
60  output = sqrtf(in);
61  break;
62  }
63  case ActivationFunction::Square:
64  {
65  output = in * in;
66  break;
67  }
68  case ActivationFunction::TanH:
69  {
70  output = a * tanhf(b * in);
71  break;
72  }
73  case ActivationFunction::Elu:
74  {
75  output = (in >= 0) ? in : a * (expf(in) - 1);
76  break;
77  }
78  case ActivationFunction::HardSwish:
79  {
80  // hard_swish(x) = x * relu6(x+3) / 6
81  // relu6(x) = min(max(x,0),6)
82  output = in * (std::min(std::max((in + 3),0.0f),6.0f)) / 6;
83  break;
84  }
85  case ActivationFunction::Gelu:
86  {
87  // gelu(x) = x * 1/2 * (1 + erf(x / sqrt(2))),
88  // where erf is Gaussian error function
89  output = in * (0.5f * (1.0f + erff(static_cast<float>(in / std::sqrt(2)))));
90  break;
91  }
92  default:
93  {
94  throw InvalidArgumentException("Unsupported activation function");
95  }
96  }
97 
98  return output;
99 }

References Abs, BoundedReLu, Elu, Gelu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by Activation(), and LstmImpl().

◆ AllocateOutputData()

void armnn::AllocateOutputData ( unsigned int  numOutput,
unsigned int  numSelected,
const std::vector< float > &  boxCorners,
const std::vector< unsigned int > &  outputIndices,
const std::vector< unsigned int > &  selectedBoxes,
const std::vector< unsigned int > &  selectedClasses,
const std::vector< float > &  selectedScores,
float *  detectionBoxes,
float *  detectionScores,
float *  detectionClasses,
float *  numDetections 
)

Definition at line 103 of file DetectionPostProcess.cpp.

114 {
115  for (unsigned int i = 0; i < numOutput; ++i)
116  {
117  unsigned int boxIndex = i * 4;
118  if (i < numSelected)
119  {
120  unsigned int boxCornorIndex = selectedBoxes[outputIndices[i]] * 4;
121  detectionScores[i] = selectedScores[outputIndices[i]];
122  detectionClasses[i] = armnn::numeric_cast<float>(selectedClasses[outputIndices[i]]);
123  detectionBoxes[boxIndex] = boxCorners[boxCornorIndex];
124  detectionBoxes[boxIndex + 1] = boxCorners[boxCornorIndex + 1];
125  detectionBoxes[boxIndex + 2] = boxCorners[boxCornorIndex + 2];
126  detectionBoxes[boxIndex + 3] = boxCorners[boxCornorIndex + 3];
127  }
128  else
129  {
130  detectionScores[i] = 0.0f;
131  detectionClasses[i] = 0.0f;
132  detectionBoxes[boxIndex] = 0.0f;
133  detectionBoxes[boxIndex + 1] = 0.0f;
134  detectionBoxes[boxIndex + 2] = 0.0f;
135  detectionBoxes[boxIndex + 3] = 0.0f;
136  }
137  }
138  numDetections[0] = armnn::numeric_cast<float>(numSelected);
139 }

Referenced by DetectionPostProcess().

◆ AllTypesAreEqualImpl() [1/2]

bool armnn::AllTypesAreEqualImpl ( t1,
t2,
Rest...  rest 
)

Definition at line 65 of file LayerSupportRules.hpp.

66 {
67  static_assert(std::is_same<T, TensorInfo>::value, "Type T must be a TensorInfo");
68 
69  return (t1.GetDataType() == t2.GetDataType()) && AllTypesAreEqualImpl(t2, rest...);
70 }

References AllTypesAreEqualImpl().

◆ AllTypesAreEqualImpl() [2/2]

bool armnn::AllTypesAreEqualImpl ( )

Definition at line 59 of file LayerSupportRules.hpp.

60 {
61  return true;
62 }

Referenced by AllTypesAreEqualImpl(), and TypesAreEqual::TypesAreEqual().

◆ Append() [1/2]

void armnn::Append ( Optimizer::Optimizations optimizations,
Front &&  front,
Others &&...  others 
)

Definition at line 36 of file Optimizer.hpp.

37 {
38  Append<Front>(optimizations, std::forward<Front>(front));
39  Append<Others...>(optimizations, std::forward<Others>(others)...);
40 };

References Append().

◆ Append() [2/2]

void armnn::Append ( Optimizer::Optimizations optimizations,
T &&  optimization 
)

Definition at line 30 of file Optimizer.hpp.

31 {
32  optimizations.emplace_back(new T(optimization));
33 };

Referenced by Append(), and MakeOptimizations().

◆ ApplyBackendOptimizations()

OptimizationResult armnn::ApplyBackendOptimizations ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
BackendsMap backends,
const ModelOptions modelOptions,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 1328 of file Network.cpp.

1333 {
1334  ARMNN_ASSERT(optNetObjPtr);
1335  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ApplyBackendOptimizations")
1336  OptimizationResult result;
1337 
1338  // Get the optimized graph
1339  Graph& optGraph = optNetObjPtr->GetGraph();
1340 
1341  // Run backend specific optimizations
1342  for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
1343  {
1344  auto backendObjPtr = backends.find(selectedBackend)->second.get();
1345  ARMNN_ASSERT(backendObjPtr);
1346 
1347  if (selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc)
1348  {
1350  Optimizer::Pass(optGraph, MakeOptimizations(optimizations::FusePermuteIntoConstLayer()));
1351  }
1352 
1353  // Select sub-graphs based on backend
1354  SubgraphViewSelector::Subgraphs subgraphs =
1355  SubgraphViewSelector::SelectSubgraphs(optGraph,
1356  // Select layers assigned to the requested backend
1357  [&backendObjPtr](const Layer& layer)
1358  {
1359 
1360  return layer.GetType() != LayerType::Input &&
1361  layer.GetType() != LayerType::Output &&
1362  layer.GetBackendId() == backendObjPtr->GetId();
1363  });
1364  if (subgraphs.empty())
1365  {
1366  // No sub-graphs found, try with next selected backend
1367  continue;
1368  }
1369 
1370  // Try to optimize each sub-graph
1371  for (auto& subgraph : subgraphs)
1372  {
1373  // Try to optimize the current sub-graph
1374  ARMNN_SCOPED_PROFILING_EVENT(backendObjPtr->GetId(), "Optimizer_OptimizeSubgraph");
1375  OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph, modelOptions);
1376  ARMNN_ASSERT(optimizationViews.Validate(*subgraph));
1377 
1378  // Optimization attempted, check the resulting optimized sub-graph
1379  for (auto& substitution : optimizationViews.GetSubstitutions())
1380  {
1381  // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
1382  SubgraphView& replacementSubgraph = substitution.m_ReplacementSubgraph;
1383  SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
1384  optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
1385 
1386  // Assign the current backend to the optimized sub-graph
1387  const SubgraphView::IConnectableLayers& subgraphLayers = replacementSubgraph.GetIConnectableLayers();
1388  std::for_each(subgraphLayers.begin(), subgraphLayers.end(), [&selectedBackend](IConnectableLayer* l)
1389  {
1390  ARMNN_ASSERT(l);
1391  PolymorphicDowncast<Layer*>(l)->SetBackendId(selectedBackend);
1392  });
1393  }
1394 
1395  // Remove deleted sub-graphs
1396  for (auto& deletedSubgraph : optimizationViews.GetDeletedSubgraphs())
1397  {
1398  for (auto& l : deletedSubgraph.GetIConnectableLayers())
1399  {
1400  Layer* deletedLayer = PolymorphicDowncast<Layer*>(l);
1401  for (unsigned int in = deletedLayer->GetNumInputSlots(); in > 0; --in)
1402  {
1403  auto inputSlot = deletedLayer->GetInputSlot(in -1);
1404  OutputSlot* parentOut = inputSlot.GetConnectedOutputSlot();
1405  parentOut->Disconnect(inputSlot);
1406  for (unsigned int out = deletedLayer->GetOutputSlot(in -1).GetNumConnections(); out > 0; --out)
1407  {
1408  InputSlot* childIn = deletedLayer->GetOutputSlot(in - 1).GetConnection(out -1);
1409  deletedLayer->GetOutputSlot(in - 1).Disconnect(*childIn);
1410  parentOut->Connect(*childIn);
1411  }
1412  }
1413  optGraph.EraseLayer(deletedLayer);
1414  }
1415  }
1416 
1417  if (!optimizationViews.GetFailedSubgraphs().empty())
1418  {
1419  std::stringstream warningMsg;
1420  warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
1421  ReportWarning(warningMsg.str(), errMessages);
1422 
1423  // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
1424  BackendSettings settingsCopy(backendSettings);
1425  if (!backendObjPtr->GetId().IsCpuRef())
1426  {
1427  // Add the current backend to the list of backends to ignore
1428  settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
1429  }
1430 
1431  int count=0;
1432  for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
1433  {
1434  // An error occurred: the optimization was attempted but not performed, try different backends
1435  std::stringstream subgraphMsg;
1436  subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetIConnectableLayers().size()
1437  << " layers inside sub-graph " << count++;
1438  ReportWarning(subgraphMsg.str(), errMessages);
1439 
1440  OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
1441  settingsCopy,
1442  *subgraph,
1443  errMessages);
1444  if (reassignmentResult.m_Error)
1445  {
1446  // Failed to re-assign one of the remaining backends to each layer of the sub-graph
1447  result.m_Error = true;
1448  return result;
1449  }
1450  }
1451  }
1452  }
1453  }
1454 
1455  return result;
1456 }

References ARMNN_ASSERT, ARMNN_SCOPED_PROFILING_EVENT, AssignBackends(), OutputSlot::Connect(), CpuAcc, OutputSlot::Disconnect(), Graph::EraseLayer(), Layer::GetBackendId(), OutputSlot::GetConnection(), OptimizationViews::GetDeletedSubgraphs(), OptimizationViews::GetFailedSubgraphs(), OptimizedNetworkImpl::GetGraph(), SubgraphView::GetIConnectableLayers(), Layer::GetInputSlot(), OutputSlot::GetNumConnections(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OptimizationViews::GetSubstitutions(), Layer::GetType(), GpuAcc, Input, OptimizationResult::m_Error, BackendSettings::m_IgnoredBackends, BackendSettings::m_SelectedBackends, MakeOptimizations(), Output, Optimizer::Pass(), ReportWarning(), SubgraphViewSelector::SelectSubgraphs(), Graph::SubstituteSubgraph(), Undefined, and OptimizationViews::Validate().

Referenced by Optimize().

◆ ArgMinMax() [1/3]

template void armnn::ArgMinMax ( Decoder< float > &  in,
int32_t *  out,
const TensorInfo inputTensorInfo,
const TensorInfo outputTensorInfo,
ArgMinMaxFunction  function,
int  axis 
)

◆ ArgMinMax() [2/3]

template void armnn::ArgMinMax ( Decoder< float > &  in,
int64_t *  out,
const TensorInfo inputTensorInfo,
const TensorInfo outputTensorInfo,
ArgMinMaxFunction  function,
int  axis 
)

◆ ArgMinMax() [3/3]

void ArgMinMax ( Decoder< float > &  in,
OUT *  out,
const TensorInfo inputTensorInfo,
const TensorInfo outputTensorInfo,
ArgMinMaxFunction  function,
int  axis 
)

Definition at line 17 of file ArgMinMax.cpp.

19 {
20  IgnoreUnused(outputTensorInfo);
21 
22  unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis);
23 
24  const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis);
25  const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis];
26  const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(),
27  uAxis + 1,
28  inputTensorInfo.GetNumDimensions());
29 
30  for (unsigned int outer = 0; outer < outerElements; ++outer) {
31  for (unsigned int inner = 0; inner < innerElements; ++inner) {
32  in[outer * axisSize * innerElements + inner];
33  auto tmpValue = in.Get();
34  unsigned int tmpIndex = 0;
35  for (unsigned int i = 1; i < axisSize; ++i) {
36  in[(outer * axisSize * innerElements) + (i * innerElements) + inner];
37  const auto& value = in.Get();
38  if ((function == armnn::ArgMinMaxFunction::Min && value < tmpValue) ||
39  (function == armnn::ArgMinMaxFunction::Max && value > tmpValue)) {
40  tmpValue = value;
41  tmpIndex = i;
42  }
43  }
44 
45  out[outer * innerElements + inner] = armnn::numeric_cast<OUT>(tmpIndex);
46  }
47  }
48 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), armnnUtils::GetUnsignedAxis(), IgnoreUnused(), Max, and Min.

◆ AssertNumberOfInputSlots()

void armnn::AssertNumberOfInputSlots ( Layer layer)

Definition at line 28 of file Layer.cpp.

29 {
30  switch (layer.GetType())
31  {
32  case LayerType::Convolution2d:
33  case LayerType::DepthwiseConvolution2d:
35  {
36  ARMNN_ASSERT(layer.GetNumInputSlots() == 2 ||
37  layer.GetNumInputSlots() == 3);
38  break;
39  }
40  default:
41  {
42  ARMNN_ASSERT(layer.GetNumInputSlots() == 1);
43  break;
44  }
45  }
46 }

References ARMNN_ASSERT, Convolution2d, DepthwiseConvolution2d, FullyConnected, Layer::GetNumInputSlots(), and Layer::GetType().

Referenced by InputSlot::Insert().

◆ AssignBackends() [1/3]

OptimizationResult AssignBackends ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
Graph::Iterator firstLayer,
Graph::Iterator lastLayer,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 1186 of file Network.cpp.

1191 {
1192  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
1193  OptimizationResult result;
1194 
1195  auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
1196  if (availablePreferredBackends.empty())
1197  {
1198  std::stringstream failureMsg;
1199  failureMsg << "No preferred backends are available";
1200  ReportError(failureMsg.str(), errMessages);
1201 
1202  result.m_Error = true;
1203  return result;
1204  }
1205 
1206  for (auto it = firstLayer; it != lastLayer; ++it)
1207  {
1208  auto layer = PolymorphicDowncast<Layer*>(*it);
1209  std::vector<DataType> inOutDataType = GetLayerInOutDatatype(layer);
1210 
1211  // In AttemptBackendAssignment() we check:
1212  // - if input/output datatypes of the layer are float16
1213  // - if the layer is supported with these datatypes
1214  // If the layer is not supported (failing on ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED() in clframework),
1215  // we attempt to insert convertion layers either side of the new fp32 layer.
1216  bool isFloat16 = false;
1217  for (auto type : inOutDataType)
1218  {
1219  if (type == DataType::Float16)
1220  {
1221  isFloat16 = true;
1222  break;
1223  }
1224  }
1225 
1226  if (layer->GetBackendId() == "Unknown" || isFloat16)
1227  {
1228  AssignBackendsIConnectable(optNetObjPtr,
1229  *it,
1230  errMessages,
1231  result,
1232  backendSettings,
1233  availablePreferredBackends);
1234  }
1235  }
1236 
1237  for (auto it = firstLayer; it != lastLayer; ++it)
1238  {
1239  auto layer = PolymorphicDowncast<Layer*>(*it);
1240 
1241  if(layer->GetType() == LayerType::Input)
1242  {
1243  BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1244  layer->SetBackendId(connectedBackendId);
1245  }
1246  }
1247 
1248  return result;
1249 }

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackendsIConnectable(), Float16, BackendSettings::GetAvailablePreferredBackends(), GetLayerInOutDatatype(), Input, OptimizationResult::m_Error, ReportError(), and Undefined.

Referenced by ApplyBackendOptimizations(), AssignBackends(), and Optimize().

◆ AssignBackends() [2/3]

OptimizationResult armnn::AssignBackends ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
SubgraphView subgraph,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 1295 of file Network.cpp.

1299 {
1300  SubgraphView::IConnectableLayerIterator firstLayer = subgraph.begin();
1301  SubgraphView::IConnectableLayerIterator lastLayer = subgraph.end();
1302  return AssignBackends(optNetObjPtr,
1303  backendSettings,
1304  firstLayer,
1305  lastLayer,
1306  errMessages);
1307 }

References AssignBackends(), SubgraphView::begin(), and SubgraphView::end().

◆ AssignBackends() [3/3]

OptimizationResult AssignBackends ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
SubgraphView::IConnectableLayerIterator firstLayer,
SubgraphView::IConnectableLayerIterator lastLayer,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 1251 of file Network.cpp.

1256 {
1257  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
1258  OptimizationResult result;
1259 
1260  auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
1261  if (availablePreferredBackends.empty())
1262  {
1263  std::stringstream failureMsg;
1264  failureMsg << "No preferred backends are available";
1265  ReportError(failureMsg.str(), errMessages);
1266 
1267  result.m_Error = true;
1268  return result;
1269  }
1270 
1271  for (auto it = firstLayer; it != lastLayer; ++it)
1272  {
1273  AssignBackendsIConnectable(optNetObjPtr,
1274  *it,
1275  errMessages,
1276  result,
1277  backendSettings,
1278  availablePreferredBackends);
1279  }
1280 
1281  for (auto it = firstLayer; it != lastLayer; ++it)
1282  {
1283  auto layer = PolymorphicDowncast<Layer*>(*it);
1284 
1285  if(layer->GetType() == LayerType::Input)
1286  {
1287  BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1288  layer->SetBackendId(connectedBackendId);
1289  }
1290  }
1291 
1292  return result;
1293 }

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackendsIConnectable(), BackendSettings::GetAvailablePreferredBackends(), Input, OptimizationResult::m_Error, ReportError(), and Undefined.

◆ AssignBackendsIConnectable()

void armnn::AssignBackendsIConnectable ( OptimizedNetworkImpl optNetObjPtr,
IConnectableLayer it,
Optional< std::vector< std::string > & >  errMessages,
OptimizationResult result,
BackendSettings backendSettings,
std::vector< BackendId > &  availablePreferredBackends 
)

Definition at line 1076 of file Network.cpp.

1082 {
1083  auto ReturnError = [&](const Layer* layer)
1084  {
1085  return ReturnWithError(result, layer, backendSettings, errMessages);
1086  };
1087 
1088  auto layer = PolymorphicDowncast<Layer*>(it);
1089 
1090  if (layer->GetType() == LayerType::Input)
1091  {
1092  return;
1093  }
1094 
1095  std::vector<DataType> inOutDataType = GetLayerInOutDatatype(layer);
1096 
1097  std::string reasonIfUnsupported;
1098  bool found = false;
1099  if (!CheckScaleSetOnQuantizedType(layer, errMessages))
1100  {
1101  // don't bomb immediately, find all the quantized outputs
1102  // which haven't had a scale set and report them all back.
1103  result.m_Error = true;
1104  }
1105 
1106  // First try assign layer to hint backend
1107  if (layer->GetBackendHint().has_value() &&
1108  backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
1109  AttemptBackendAssignment(backendSettings,
1110  optNetObjPtr->GetGraph(),
1111  layer,
1112  layer->GetBackendHint().value(),
1113  inOutDataType[0],
1114  inOutDataType[1],
1115  availablePreferredBackends,
1116  reasonIfUnsupported,
1117  errMessages).IsOk())
1118  {
1119  found = true;
1120  backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
1121  }
1122  else
1123  {
1124  // Try assign layer to prefered list of backends
1125  for (const auto& backend : availablePreferredBackends)
1126  {
1127  if (layer->GetBackendHint().has_value() &&
1128  layer->GetBackendHint().value() == backend)
1129  {
1130  continue; //Don't re-test the backend hint
1131  }
1132 
1133  OptimizationResult res = AttemptBackendAssignment(backendSettings,
1134  optNetObjPtr->GetGraph(),
1135  layer,
1136  backend,
1137  inOutDataType[0],
1138  inOutDataType[1],
1139  availablePreferredBackends,
1140  reasonIfUnsupported,
1141  errMessages);
1142 
1143  if (res.IsOk())
1144  {
1145  found = true;
1146  backendSettings.m_SelectedBackends.insert(backend);
1147  break;
1148  }
1149  else if (res.IsError())
1150  {
1151  result = res; // Cannot continue.
1152  // Note: we don't need to log the error as it would already
1153  // be logged in AttemptBackendAssignment().
1154  }
1155  else
1156  {
1157  ARMNN_ASSERT_MSG(res.IsWarningOnly(), "OptimizationResult in unexpected state.");
1158  }
1159  }
1160  }
1161 
1162  // If the layer is unsupported by any devices, log and return a null network.
1163  if (!found)
1164  {
1165  // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
1166  // fallback we should set the compute device on the layer to CpuRef (these are not
1167  // available as accelerated operations, or are only available under certain
1168  // conditions, currently they comprise MemCopy, Constant, Permute)
1169  armnn::LayerType layerType = layer->GetType();
1170  if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
1171  layerType == armnn::LayerType::Constant ||
1172  layerType == armnn::LayerType::Permute))
1173  {
1174  BackendId cpuBackendId(armnn::Compute::CpuRef);
1175  layer->SetBackendId(cpuBackendId);
1176  backendSettings.m_SelectedBackends.insert(cpuBackendId);
1177  }
1178  else
1179  {
1180  result = ReturnError(layer);
1181  }
1182  }
1183 
1184 }

References ARMNN_ASSERT_MSG, AttemptBackendAssignment(), CheckScaleSetOnQuantizedType(), Constant, CpuRef, OptimizedNetworkImpl::GetGraph(), GetLayerInOutDatatype(), Input, BackendSettings::IsBackendSupported(), BackendSettings::IsCpuRefUsed(), OptimizationResult::IsError(), OptimizationResult::IsOk(), OptimizationResult::IsWarningOnly(), OptimizationResult::m_Error, BackendSettings::m_SelectedBackends, MemCopy, Permute, and ReturnWithError().

Referenced by AssignBackends().

◆ AssignSplitId()

void armnn::AssignSplitId ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo 
)

Definition at line 309 of file SubgraphViewSelector.cpp.

310 {
311  // Check each input to see if we can attach ourselves to any of the subgraphs that have already been assigned.
312  ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo)
313  {
314  // We can only attach ourselves to the subgraph from this input if there isn't a cut here.
315  if (layerInfo.m_IsSelected == parentInfo.m_IsSelected)
316  {
317  // We also need to check that merging into this subgraph won't cause a dependency cycle between subgraphs.
318  // This will be the case if the subgraph that we will become part of is already a dependency
319  // of one of the subgraphs that are input to this layer, e.g:
320  //
321  // 0 | The numbers (0, 1) are the subgraph IDs of each layer and we are looking at layer X.
322  // / \ |
323  // 1 0 | We can't merge X into subgraph 0, because the left-hand input already depends on subgraph 0.
324  // \ / | We can however merge X into subgraph 1.
325  // X |
326  //
327  bool dependenciesOk = true;
328  ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& otherParentInfo)
329  {
330  // We call HasAntecedent() ~ n^2 times, where n is the number of inputs to this layer.
331  // Hence it is important that this is efficient - see PartialSubgraph class description.
332  if (otherParentInfo.m_Subgraph->HasAntecedent(parentInfo.m_Subgraph.get()))
333  {
334  dependenciesOk = false;
335  }
336  });
337 
338  if (dependenciesOk)
339  {
340  // Merge into the subgraph of this input. If we have already been merged into another subgraph
341  // (from another input of this layer), then merge both of them together.
342  if (layerInfo.m_Subgraph == nullptr)
343  {
344  layerInfo.m_Subgraph = parentInfo.m_Subgraph;
345  }
346  else
347  {
348  // We call MergeWith() ~ n times, where n is the number of inputs to this layer.
349  // Therefore it does not need to be as performant as HasAntecedent().
350  layerInfo.m_Subgraph->MergeWith(parentInfo.m_Subgraph.get());
351  }
352  }
353  }
354  });
355 
356  // If we weren't able to merge into an existing subgraph then we need to make a new one
357  if (layerInfo.m_Subgraph == nullptr)
358  {
359  layerInfo.m_Subgraph = std::make_shared<PartialSubgraph>();
360  }
361 
362  // Record dependencies of the chosen subgraph based on the inputs of this layer.
363  ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo)
364  {
365  // These functions are called ~n times, where n is the number of inputs to this layer.
366  // Therefore it does not need to be as performant as HasAntecedent().
367  if (!layerInfo.m_Subgraph->IsMergedWith(parentInfo.m_Subgraph.get()))
368  {
369  layerInfo.m_Subgraph->AddDirectAntecedent(parentInfo.m_Subgraph.get());
370  }
371  });
372 }

References ForEachLayerInput().

Referenced by SubgraphViewSelector::SelectSubgraphs().

◆ AttemptBackendAssignment()

OptimizationResult armnn::AttemptBackendAssignment ( BackendSettings backendSettings,
Graph graph,
Layer layer,
BackendId  backend,
DataType  dataTypeIn,
DataType  dataTypeOut,
const std::vector< BackendId > &  availablePreferredBackends,
std::string &  reasonIfUnsupported,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 847 of file Network.cpp.

856 {
857  OptimizationResult result;
858 
859  // Helper lambda to compose meaningful error message before returning with error
860  auto ReturnError = [&](const Layer* layer)
861  {
862  return ReturnWithError(result, layer, backendSettings, errMessages);
863  };
864 
865  // need to set the compute device on the layer
866  // before we can check if it is supported
867  layer->SetBackendId(backend);
868  std::string currentReasonIfUnsupported;
869 
870  // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture
871  // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs
872  // to be FP32 and inserting convert layers around the FP32 operator.
873  bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), currentReasonIfUnsupported);
874  reasonIfUnsupported += currentReasonIfUnsupported;
875  // This string matches the error message that is produced by acl when attempting to run FP16 kernels on
876  // a cpu or build that does not have fp16 support. We use this to check if we should add
877  // conversion layers or not.
878  std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
879  if (!isLayerSupported || currentReasonIfUnsupported.find(checkStr) != std::string::npos)
880  {
881  if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
882  {
883  if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
884  && layer->GetType() != LayerType::ConvertFp32ToFp16
885  && layer->GetType() != LayerType::ConvertFp16ToFp32)
886  {
887  auto ConstantLayerFromFp16ToFp32 = [](Layer& layer)
888  {
889  if (layer.GetType() == LayerType::Constant)
890  {
891  ConstantLayer* constantLayer = PolymorphicDowncast<ConstantLayer*>(&layer);
892 
893  auto& info = constantLayer->m_LayerOutput->GetTensorInfo();
894 
895  if (info.GetDataType() == DataType::Float16)
896  {
897  std::vector<float> newValues(info.GetNumElements());
898 
900  constantLayer->m_LayerOutput->GetConstTensor<Half>(),
901  info.GetNumElements(),
902  newValues.data());
903 
904  TensorInfo newInfo(info);
905  newInfo.SetDataType(DataType::Float32);
906  ConstTensor newInput(newInfo, newValues);
907  constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
908 
909  layer.GetOutputSlot(0).SetTensorInfo(newInfo);
910  }
911  }
912  };
913 
914  bool checkType = false;
915 
916  for (auto inputSlot : layer->GetInputSlots())
917  {
918  auto connectedOutputSlot = inputSlot.GetConnectedOutputSlot();
919  if (connectedOutputSlot->GetOwningLayer().GetType() == LayerType::Constant)
920  {
921  if (connectedOutputSlot->GetNumConnections() == 1)
922  {
923  checkType = true;
924  ConstantLayerFromFp16ToFp32(connectedOutputSlot->GetOwningLayer());
925  }
926  }
927  }
928 
929  // Insert FP16 -> FP32 conversion layer before current layer
930  std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
931  if (dataTypeIn == DataType::Float16)
932  {
933  convertFp16ToFp32Layers =
934  InsertConvertFp16ToFp32LayersBefore(graph, *layer, checkType);
935  }
936 
937  // Insert FP32 -> FP16 conversion layer after current layer
938  std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
939  if (dataTypeOut == DataType::Float16)
940  {
941  convertFp32ToFp16Layers =
942  InsertConvertFp32ToFp16LayersAfter(graph, *layer);
943  }
944 
945  // Assign a supported backend to the newly introduced conversion layers
946  auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
947  {
948  bool supportedBackendFound = false;
949  std::string reasonIfUnsupported;
950 
951  // Try preferred backend first
952  layer->SetBackendId(preferredBackend);
954  EmptyOptional(),
955  reasonIfUnsupported))
956  {
957  supportedBackendFound = true;
958  }
959  else
960  {
961  for (const auto& backend : availablePreferredBackends)
962  {
963  // Skip preferred backend (we already determined that it is not supported)
964  if (backend == preferredBackend)
965  {
966  continue;
967  }
968 
969  layer->SetBackendId(backend);
971  EmptyOptional(),
972  reasonIfUnsupported))
973  {
974  supportedBackendFound = true;
975  break;
976  }
977  }
978  }
979 
980  return supportedBackendFound;
981  };
982 
983  for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
984  {
985  if (!AssignFirstSupportedBackend(convertLayer, backend))
986  {
987  return ReturnError(convertLayer);
988  }
989  }
990 
991  for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
992  {
993  if (!AssignFirstSupportedBackend(convertLayer, backend))
994  {
995  return ReturnError(convertLayer);
996  }
997  }
998 
999  return result;
1000  }
1001  }
1002 
1003  std::stringstream warningMsg;
1004  warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
1005  << " is not supported on requested backend " << layer->GetBackendId().Get()
1006  << " for input data type " << GetDataTypeName(dataTypeIn)
1007  << " and output data type " << GetDataTypeName(dataTypeOut)
1008  << " (reason: " << reasonIfUnsupported
1009  << "), falling back to the next backend.";
1010  ReportWarning(warningMsg.str(), errMessages);
1011 
1012  return OptimizationResult(true, false);
1013  }
1014  else
1015  {
1016  return result;
1017  }
1018 }

References Constant, FloatingPointConverter::ConvertFloat16To32(), ConvertFp16ToFp32, ConvertFp32ToFp16, Float16, Float32, BackendId::Get(), Layer::GetBackendId(), GetDataTypeName(), Layer::GetInputSlots(), GetLayerTypeAsCString(), Layer::GetOutputSlot(), Layer::GetType(), info, InsertConvertFp16ToFp32LayersBefore(), InsertConvertFp32ToFp16LayersAfter(), IWorkloadFactory::IsLayerSupported(), ConstantLayer::m_LayerOutput, ReportWarning(), ReturnWithError(), Layer::SetBackendId(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AssignBackendsIConnectable().

◆ BackendRegistryInstance()

◆ BatchNormImpl()

void BatchNormImpl ( const BatchNormalizationQueueDescriptor data,
Decoder< float > &  meanDecoder,
Decoder< float > &  varianceDecoder,
Decoder< float > &  betaDecoder,
Decoder< float > &  gammaDecoder,
Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 18 of file BatchNormImpl.cpp.

25 {
26  const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
27  const TensorShape inputShape = inputInfo.GetShape();
28 
29  armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
30 
31  unsigned int inputBatches = inputShape[0];
32  unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
33  unsigned int inputWidth = inputShape[dataLayout.GetWidthIndex()];
34  unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
35 
36  for (unsigned int c = 0; c < inputChannels; c++)
37  {
38  meanDecoder[c];
39  varianceDecoder[c];
40  betaDecoder[c];
41  gammaDecoder[c];
42  float mean = meanDecoder.Get();
43  float var = varianceDecoder.Get();
44  float beta = betaDecoder.Get();
45  float gamma = gammaDecoder.Get();
46 
47  float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps);
48  float add = beta - mult * mean;
49 
50  for (unsigned int n = 0; n < inputBatches; n++)
51  {
52  for (unsigned int h = 0; h < inputHeight; h++)
53  {
54  for (unsigned int w = 0; w < inputWidth; w++)
55  {
56  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
57  inputDecoder[index];
58  outputEncoder[index];
59  outputEncoder.Set(mult * inputDecoder.Get() + add);
60  }
61  }
62  }
63  }
64 }

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), GetTensorInfo(), DataLayoutIndexed::GetWidthIndex(), BatchNormalizationDescriptor::m_DataLayout, BatchNormalizationDescriptor::m_Eps, QueueDescriptor::m_Inputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

◆ BatchToSpaceNd()

void BatchToSpaceNd ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const BatchToSpaceNdDescriptor params,
Decoder< float > &  inputData,
Encoder< float > &  outputData 
)

Definition at line 50 of file BatchToSpaceNd.cpp.

55 {
56  unsigned int rank = inputInfo.GetNumDimensions();
57  if (rank != 3 && rank != 4 )
58  {
59  throw InvalidArgumentException("Tensor rank must be either 3 or 4, but it is " + std::to_string(rank),
60  CHECK_LOCATION());
61  }
62 
63  DataLayoutIndexed dataLayout = params.m_DataLayout;
64  unsigned int channelDimension3D = params.m_DataLayout == DataLayout::NCHW ? 1 : 2;
65 
66  TensorShape inputShape = inputInfo.GetShape();
67  TensorShape outputShape = outputInfo.GetShape();
68 
69  const unsigned int inputBatchSize = inputShape[0];
70  const unsigned int outputBatchSize = outputShape[0];
71 
72  const unsigned int channels = (rank == 3) ? inputShape[channelDimension3D]
73  : inputShape[dataLayout.GetChannelsIndex()];
74 
75  const unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
76  const unsigned int inputWidth = (rank == 3) ? 1 : inputShape[dataLayout.GetWidthIndex()];
77  const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
78  const unsigned int outputWidth = (rank == 3) ? 1 : outputShape[dataLayout.GetWidthIndex()];
79 
80  const unsigned int blockHeight = params.m_BlockShape[0];
81  const unsigned int blockWidth = (rank == 3) ? 1 : params.m_BlockShape[1];
82 
83  const unsigned int cropsTop = params.m_Crops[0].first;
84  const unsigned int cropsLeft = (rank == 3) ? 0 : params.m_Crops[1].first;
85 
86  for (unsigned int inBatch = 0; inBatch < inputBatchSize; ++inBatch)
87  {
88  const unsigned int outBatch = inBatch % outputBatchSize;
89  const unsigned int spatialOffset = inBatch / outputBatchSize;
90 
91  for (unsigned int inH = 0; inH < inputHeight; ++inH)
92  {
93  const unsigned int outH = inH * blockHeight + spatialOffset / blockWidth - cropsTop;
94 
95  if (outH >= outputHeight)
96  {
97  continue;
98  }
99 
100  for (unsigned int inW = 0; inW < inputWidth; ++inW)
101  {
102  const unsigned int outW = inW * blockWidth + spatialOffset % blockWidth - cropsLeft;
103 
104  if (outW >= outputWidth)
105  {
106  continue;
107  }
108 
109  for (unsigned int c = 0; c < channels; c++)
110  {
111  unsigned int outOffset = Offset(outputShape, outBatch, outH, outW, c, dataLayout);
112  unsigned int inOffset = Offset(inputShape, inBatch, inH, inW, c, dataLayout);
113 
114  outputData[outOffset];
115  inputData[inOffset];
116  outputData.Set(inputData.Get());
117  }
118  }
119  }
120  }
121 }

References BatchToSpaceNd(), CHECK_LOCATION, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetNumDimensions(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), BatchToSpaceNdDescriptor::m_BlockShape, BatchToSpaceNdDescriptor::m_Crops, BatchToSpaceNdDescriptor::m_DataLayout, Offset(), and Encoder< IType >::Set().

Referenced by BatchToSpaceNd(), and BatchToSpaceNdLayer::BatchToSpaceNdLayer().

◆ BuildAddMulAddSlotLists()

void armnn::BuildAddMulAddSlotLists ( bool  handleReLu,
bool  multipleOutputs,
std::vector< SlotListType > &  inputLayersSlotLists,
std::vector< SlotListType > &  outputLayersSlotLists 
)

Definition at line 36 of file NeonBackendOptimizationUtils.hpp.

40 {
41  // Build input slot list
42  inputLayersSlotLists.push_back({0, 1}); // Add
43  inputLayersSlotLists.push_back({1}); // Mul
44  inputLayersSlotLists.push_back({1}); // Add
45  if (handleReLu)
46  {
47  inputLayersSlotLists.push_back({}); // Relu
48  }
49 
50  // Build output slot list
51  if (multipleOutputs)
52  {
53  outputLayersSlotLists.push_back({0}); // Add
54  }
55  else
56  {
57  outputLayersSlotLists.push_back({}); // Add
58  }
59  outputLayersSlotLists.push_back({}); // Mul
60  if (handleReLu)
61  {
62  outputLayersSlotLists.push_back({}); // Add
63  outputLayersSlotLists.push_back({0}); // Relu
64  }
65  else
66  {
67  outputLayersSlotLists.push_back({0}); // Add
68  }
69 }

◆ BuildAddMulAddTensorInfoLists()

bool armnn::BuildAddMulAddTensorInfoLists ( Type *  layerList[4],
unsigned int &  numInputs,
unsigned int &  numOutputs,
std::vector< TensorInfo > &  inputInfos,
std::vector< TensorInfo > &  outputInfos,
const ActivationDescriptor *&  activationDescriptor,
bool &  fuseReLu 
)

Definition at line 87 of file NeonBackendOptimizationUtils.hpp.

94 {
95  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[0]);
96  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[1]);
97  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[2]);
98 
99  ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[0], BinaryOperation::Add));
100  ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[1], BinaryOperation::Mul));
101  ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[2], BinaryOperation::Add));
102 
103  fuseReLu = (layerList[3] != nullptr);
104  if (fuseReLu)
105  {
106  activationDescriptor = &PolymorphicDowncast<ActivationLayer *>(layerList[3])->GetParameters();
107  ARMNN_THROW_INVALIDARG_IF_FALSE((activationDescriptor->m_Function == ActivationFunction::ReLu) ||
108  (activationDescriptor->m_Function == ActivationFunction::BoundedReLu));
109  }
110 
111  numInputs = 0;
112  numOutputs = 0;
113 
114  // Ensure that there are 6 input slots in the add/mul/add layers
115  // we are going to replace
116  unsigned int layerIdx = 0;
117  unsigned int inputSlotCount = 0;
118  for (layerIdx = 0; layerIdx < 3; ++layerIdx)
119  {
120  for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumInputSlots(); ++slotIdx)
121  {
122  InputSlot* inputSlot = &layerList[layerIdx]->GetInputSlot(slotIdx);
123  OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
124  if (outputSlot)
125  {
126  if (layerIdx == 0)
127  {
128  // Always count the input connections of the first add
129  inputInfos.push_back(inputSlot->GetTensorInfo());
130  numInputs++;
131  }
132  else
133  {
134  // For subsequent layers, we skip connections to the previous layers in the counting
135  if (&outputSlot->GetOwningLayer() != layerList[layerIdx-1])
136  {
137  TensorInfo inputSlotInfo = inputSlot->GetTensorInfo();
138  if (numInputs == 2 || numInputs == 3)
139  {
140  // Workaround the broadcast optimization to collapse shapes such as
141  // [1, 1, 1, 2] to [2] as required by backend
142  if (CollapseLeadingUnitDimensions(inputSlot->GetTensorInfo(), inputSlotInfo))
143  {
144  OutputSlot* previousLayerSlot = inputSlot->GetConnectedOutputSlot();
145  if (previousLayerSlot)
146  {
147  if (previousLayerSlot->GetOwningLayer().GetType() == LayerType::Constant)
148  {
149  // First update the TensorInfo in the constant owning layer
150  previousLayerSlot->SetTensorInfo(inputSlotInfo);
151  // Then update the TensorInfo in the workload for the owning layer
152  ConstantLayer* layer = PolymorphicDowncast<ConstantLayer*>(
153  &previousLayerSlot->GetOwningLayer());
154  layer->m_LayerOutput
155  = std::make_unique<ScopedTensorHandle>(
156  ConstTensor(inputSlotInfo,
157  layer->m_LayerOutput.get()->GetConstTensor<void>()));
158  }
159  }
160  }
161  }
162  inputInfos.push_back(inputSlotInfo);
163  numInputs++;
164  }
165  }
166  inputSlotCount++;
167  }
168  }
169  }
170 
171  // Check the input counts
172  bool validInputCount = (inputSlotCount == 6) && (inputInfos.size() == 4);
173  if (! validInputCount)
174  {
175  return false;
176  }
177 
178  const unsigned int maxIdx = (fuseReLu) ? 4 : 3;
179  for (layerIdx = 0; layerIdx < maxIdx; ++layerIdx)
180  {
181  for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumOutputSlots(); ++slotIdx)
182  {
183  OutputSlot* outputSlot = &layerList[layerIdx]->GetOutputSlot(slotIdx);
184 
185  for (unsigned int connectionIdx = 0; connectionIdx < outputSlot->GetNumConnections(); ++connectionIdx)
186  {
187  InputSlot* inputSlot = outputSlot->GetConnection(connectionIdx);
188  if (layerIdx < (maxIdx-1))
189  {
190  if (&inputSlot->GetOwningLayer() != layerList[layerIdx+1])
191  {
192  outputInfos.push_back(outputSlot->GetTensorInfo());
193  numOutputs++;
194  }
195  }
196  else if (layerList[layerIdx] != nullptr)
197  {
198  outputInfos.push_back(outputSlot->GetTensorInfo());
199  numOutputs++;
200  }
201  }
202  }
203  }
204 
205  // Check the output count
206  bool validOutputCount = (outputInfos.size() > 0);
207  if (! validOutputCount)
208  {
209  return false;
210  }
211 
212  return true;
213 }

References Add, ARMNN_THROW_INVALIDARG_IF_FALSE, BoundedReLu, CollapseLeadingUnitDimensions(), Constant, InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), OutputSlot::GetNumConnections(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), InputSlot::GetTensorInfo(), OutputSlot::GetTensorInfo(), Layer::GetType(), IsSequenceLayerType(), ActivationDescriptor::m_Function, ConstantLayer::m_LayerOutput, Mul, ReLu, and OutputSlot::SetTensorInfo().

◆ CalcLevel()

int armnn::CalcLevel ( const Event eventPtr)

Definition at line 246 of file Profiling.cpp.

247 {
248  int level = 0;
249  while (eventPtr != nullptr)
250  {
251  eventPtr = eventPtr->GetParentEvent();
252  level++;
253  }
254  return level;
255 }

References Event::GetParentEvent().

Referenced by ProfilerImpl::AnalyzeEventsAndWriteResults(), and ProfilerImpl::PopulateParent().

◆ CalculateEdgeStrategy()

EdgeStrategy armnn::CalculateEdgeStrategy ( BackendsMap backends,
ITensorHandleFactory::FactoryId  srcFactoryId,
const Layer layer,
const Layer connectedLayer,
TensorHandleFactoryRegistry registry,
bool  importEnabled 
)

Definition at line 1723 of file Network.cpp.

1729 {
1730  auto toBackend = backends.find(connectedLayer.GetBackendId());
1731  ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
1732 
1733  auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1734 
1735  // Legacy API check for backward compatibility
1736  if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
1737  {
1738  if (layer.GetBackendId() != connectedLayer.GetBackendId())
1739  {
1740  return EdgeStrategy::CopyToTarget;
1741  }
1742  else
1743  {
1744  return EdgeStrategy::DirectCompatibility;
1745  }
1746  }
1747 
1748  // TensorHandleFactory API present, so perform more sophisticated strategies.
1749  // Dst Output layers don't require copy because they use import or map/unmap
1750  if (connectedLayer.GetType() == LayerType::Output)
1751  {
1752  return EdgeStrategy::DirectCompatibility;
1753  }
1754 
1755  // Search for direct match in prefs
1756  for (auto&& pref : dstPrefs)
1757  {
1758  if (pref == srcFactoryId)
1759  {
1760  return EdgeStrategy::DirectCompatibility;
1761  }
1762  }
1763 
1764  // Search for export/import options
1765  ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
1766  if (srcFactory->GetExportFlags() != 0 && importEnabled)
1767  {
1768  for (auto&& pref : dstPrefs)
1769  {
1770  ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
1771 
1772  // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
1773  if (!dstFactory) {
1774  continue;
1775  }
1776  if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
1777  {
1778  auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
1779  auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
1780  &connectedLayer,
1781  CapabilityClass::PaddingRequired);
1782  auto srcFallback = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1783  auto dstFallback = dstFactory->GetCapabilities(&connectedLayer,
1784  &connectedLayer,
1785  CapabilityClass::FallbackImportDisabled);
1786  // Do not require memory copy if the source and destination do not require padding.
1787  if (srcCapability.empty() && dstCapability.empty() && srcFallback.empty() && dstFallback.empty())
1788  {
1789  return EdgeStrategy::ExportToTarget;
1790  }
1791  }
1792  }
1793  }
1794 
1795  // Search for copy options via map/unmap
1796  if (srcFactory->SupportsMapUnmap())
1797  {
1798  for (auto&& pref : dstPrefs)
1799  {
1800  ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
1801  if (dstFactory && dstFactory->SupportsMapUnmap())
1802  {
1803  return EdgeStrategy::CopyToTarget;
1804  }
1805  }
1806  }
1807 
1808  return EdgeStrategy::Undefined;
1809 }

References ARMNN_ASSERT_MSG, CopyToTarget, DirectCompatibility, ExportToTarget, FallbackImportDisabled, Layer::GetBackendId(), ITensorHandleFactory::GetCapabilities(), ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), ITensorHandleFactory::GetImportFlags(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, Output, PaddingRequired, ITensorHandleFactory::SupportsMapUnmap(), and Undefined.

Referenced by SelectTensorHandleStrategy().

◆ CalculateGatherNdKeyIndices()

std::map< std::string, unsigned int > CalculateGatherNdKeyIndices ( TensorInfo  inputInfo0,
TensorInfo  inputInfo1 
)

Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)

Parameters
inputInfo0- TensorInfo of the corresponding input tensor: params
inputInfo1- TensorInfo of the corresponding input tensor: indices
Returns
- A map with names and values for N, ND, K, W, C

Definition at line 312 of file WorkloadUtils.cpp.

313 {
314  std::vector<unsigned int> paramsShape;
315  for (unsigned int i = 0; i < inputInfo0.GetNumDimensions(); ++i)
316  {
317  paramsShape.push_back(inputInfo0.GetShape()[i]);
318  }
319 
320  std::vector<unsigned int> indicesShape;
321  for (unsigned int i = 0; i < inputInfo1.GetNumDimensions(); ++i)
322  {
323  indicesShape.push_back(inputInfo1.GetShape()[i]);
324  }
325 
326  std::map<std::string, unsigned int> keyIndices;
327 
328  // N: number of batches
329  keyIndices["N"] = 1;
330 
331  // ND: number of dimensions that are sliced from params
332  keyIndices["ND"] = indicesShape.back();
333 
334  // W: number of indices in each batch (all but the last dimension)
335  keyIndices["W"] =
336  static_cast<unsigned int>(std::accumulate(std::begin(indicesShape),
337  std::end(indicesShape) - 1,
338  1,
339  std::multiplies<>() ));
340  // K: range of each index
341  keyIndices["K"] =
342  static_cast<unsigned int>(std::accumulate(std::begin(paramsShape),
343  std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
344  1,
345  std::multiplies<>() ));
346  // C: number of channels for each index
347  keyIndices["C"] =
348  static_cast<unsigned int>(std::accumulate(std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
349  std::end(paramsShape),
350  1,
351  std::multiplies<>() ));
352 
353  return keyIndices;
354 }

References TensorInfo::GetNumDimensions(), and TensorInfo::GetShape().

Referenced by ClGatherNdWorkload::ClGatherNdWorkload(), ClGatherNdWorkloadValidate(), NeonGatherNdWorkload::NeonGatherNdWorkload(), and NeonGatherNdWorkloadValidate().

◆ CalculateSlotOption()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOption ( BackendsMap backends,
OutputSlot outputSlot,
TensorHandleFactoryRegistry registry,
bool  exportEnabled 
)

Definition at line 1573 of file Network.cpp.

1577 {
1578  // First ensure the from backends can support the TensorHandeAPI
1579  Layer& layer = outputSlot.GetOwningLayer();
1580  auto frmBackend = backends.find(layer.GetBackendId());
1581  if (frmBackend == backends.end() ||
1582  !frmBackend->second->SupportsTensorAllocatorAPI())
1583  {
1584  return ITensorHandleFactory::LegacyFactoryId;
1585  }
1586 
1587  bool outputConnection = false;
1588  for (auto&& connection : outputSlot.GetConnections())
1589  {
1590  const Layer& connectedLayer = connection->GetOwningLayer();
1591  if (connectedLayer.GetType() == LayerType::Output)
1592  {
1593  outputConnection = true;
1594  }
1595  }
1596 
1597  IBackendInternal* srcBackend = frmBackend->second.get();
1598  auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
1599 
1600  // Initialize the scores
1601  std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1602  for (auto&& pref : srcPrefs)
1603  {
1604  if (exportEnabled)
1605  {
1606  ITensorHandleFactory* factory = registry.GetFactory(pref);
1607  if (outputConnection)
1608  {
1609  // Check if this is fallback case
1610  bool fallbackConnection = false;
1611  for (auto&& inputSlot : layer.GetInputSlots())
1612  {
1613  if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetBackendId() != layer.GetBackendId())
1614  {
1615  fallbackConnection = true;
1616  }
1617  }
1618  if (fallbackConnection)
1619  {
1620  auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1621  // Cannot use factory import if fallback import is not supported.
1622  if (!factoryCap.empty())
1623  {
1624  continue;
1625  }
1626  }
1627  else if (factory->GetExportFlags() == 0)
1628  {
1629  continue;
1630  }
1631  }
1632  if (!outputConnection)
1633  {
1634  auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1635  // Cannot use factory import if fallback import is not supported.
1636  if (!factoryCap.empty())
1637  {
1638  continue;
1639  }
1640  }
1641 
1642  }
1643  else
1644  {
1645  // Only consider factories that support map/unmap
1646  ITensorHandleFactory* factory = registry.GetFactory(pref);
1647  if (!factory->SupportsMapUnmap())
1648  {
1649  // The current tensor handle factory does not support the map/unmap strategy, move to the next one
1650  continue;
1651  }
1652  }
1653 
1654 
1655  auto it = factoryScores.find(pref);
1656  if (it == factoryScores.end())
1657  {
1658  // Add new score to the table
1659  factoryScores[pref] = 0;
1660  }
1661  }
1662 
1663  // Score each handle factory based on how many times it requires copies on the slot connections
1664  for (auto&& connection : outputSlot.GetConnections())
1665  {
1666  const Layer& connectedLayer = connection->GetOwningLayer();
1667 
1668  auto toBackend = backends.find(connectedLayer.GetBackendId());
1669  ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
1670 
1671  auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1672  for (auto&& src : srcPrefs)
1673  {
1674  if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
1675  {
1676  continue;
1677  }
1678 
1679  for (auto&& dst : dstPrefs)
1680  {
1681  if (RequiresCopy(src, dst, registry))
1682  {
1683  // Copy avoided, increase the score
1684  factoryScores[src]++;
1685  break;
1686  }
1687  }
1688  }
1689  }
1690 
1691  // Find the lowest score
1692  int minScore = std::numeric_limits<int>::max();
1693  for (auto it : factoryScores)
1694  {
1695  minScore = std::min(minScore, it.second);
1696  }
1697 
1698  // Collect factories matching the best(lowest) score
1699  std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
1700  for (auto it : factoryScores)
1701  {
1702  if (it.second == minScore)
1703  {
1704  optimalFactories.push_back(it.first);
1705  }
1706  }
1707 
1708  // For all compatible Factories matching the best score, find the preferred one for the current layer.
1709  for (auto&& srcPref : srcPrefs)
1710  {
1711  for (auto&& comp : optimalFactories)
1712  {
1713  if (comp == srcPref)
1714  {
1715  return comp;
1716  }
1717  }
1718  }
1719 
1720  return ITensorHandleFactory::LegacyFactoryId;
1721 }

References ARMNN_ASSERT_MSG, FallbackImportDisabled, Layer::GetBackendId(), ITensorHandleFactory::GetCapabilities(), OutputSlot::GetConnections(), ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), IBackendInternal::GetHandleFactoryPreferences(), Layer::GetInputSlots(), OutputSlot::GetOwningLayer(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, Output, RequiresCopy(), and ITensorHandleFactory::SupportsMapUnmap().

Referenced by SelectTensorHandleStrategy().

◆ CalculateSlotOptionForInput()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOptionForInput ( BackendsMap backends,
OutputSlot slot,
TensorHandleFactoryRegistry registry,
bool  importEnabled 
)

Definition at line 1478 of file Network.cpp.

1482 {
1483  Layer& layer = slot.GetOwningLayer();
1484  ARMNN_ASSERT(layer.GetType() == LayerType::Input);
1485 
1486  // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
1487  // doesn't matter which backend it is assigned to because they all use the same implementation, which
1488  // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
1489  // select a factory with maximum compatibility with the layers connected to the InputLayer.
1490 
1491  // First ensure the from backends can support the TensorHandeAPI
1492  auto frmBackend = backends.find(layer.GetBackendId());
1493  if (frmBackend == backends.end() ||
1494  !frmBackend->second->SupportsTensorAllocatorAPI())
1495  {
1496  return ITensorHandleFactory::LegacyFactoryId;
1497  }
1498 
1499  // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
1500  // fewest copies.
1501  std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1502  int topScore = 0;
1503  ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
1504 
1505  for (auto&& connection : slot.GetConnections())
1506  {
1507 
1508  const Layer& connectedLayer = connection->GetOwningLayer();
1509 
1510  auto toBackend = backends.find(connectedLayer.GetBackendId());
1511  ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
1512 
1513  if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
1514  {
1515  // The destination backend does not support the tensor allocator API, move to the next one
1516  continue;
1517  }
1518 
1519  auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1520  for (auto&& dst : dstPrefs)
1521  {
1522  // Input layers use the mem copy workload or import, so the selected factory must
1523  // support either the map/unmap API or Import API
1524  ITensorHandleFactory* factory = registry.GetFactory(dst);
1525  if (importEnabled && factory->GetImportFlags() == 0)
1526  {
1527  continue;
1528  }
1529  else if (!importEnabled && !factory->SupportsMapUnmap())
1530  {
1531  continue;
1532  }
1533 
1534  auto it = factoryScores.find(dst);
1535  if (it == factoryScores.end())
1536  {
1537  // Add new score to the table
1538  factoryScores[dst] = 0;
1539  if (topChoice == ITensorHandleFactory::LegacyFactoryId)
1540  {
1541  topChoice = dst;
1542  }
1543  }
1544  else
1545  {
1546  // Increase the score
1547  factoryScores[dst]++;
1548 
1549  // Track the best option
1550  if (factoryScores[dst] > topScore)
1551  {
1552  topScore = factoryScores[dst];
1553  topChoice = dst;
1554  }
1555  }
1556  }
1557  }
1558 
1559  return topChoice;
1560 }

References ARMNN_ASSERT, ARMNN_ASSERT_MSG, Layer::GetBackendId(), OutputSlot::GetConnections(), TensorHandleFactoryRegistry::GetFactory(), ITensorHandleFactory::GetImportFlags(), OutputSlot::GetOwningLayer(), Layer::GetType(), Input, ITensorHandleFactory::LegacyFactoryId, and ITensorHandleFactory::SupportsMapUnmap().

Referenced by SelectTensorHandleStrategy().

◆ CalculateSlotOptionForOutput()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOptionForOutput ( BackendsMap backends,
OutputSlot slot,
TensorHandleFactoryRegistry registry 
)

Definition at line 1563 of file Network.cpp.

1566 {
1567  IgnoreUnused(backends, slot, registry);
1568  return ITensorHandleFactory::DeferredFactoryId;
1569 }

References ITensorHandleFactory::DeferredFactoryId, and IgnoreUnused().

Referenced by SelectTensorHandleStrategy().

◆ ChainReduceLayers()

std::vector<IConnectableLayer*> armnn::ChainReduceLayers ( OptimizationViews optimizationViews,
LayerType baseLayer,
ReduceDescriptor desc 
)

Definition at line 279 of file ArmComputeSubgraphUtils.hpp.

282 {
283  // Vector of new chained layers, used for substitution.
284  std::vector<IConnectableLayer*> layers;
285 
286  // Vector of axes so each layer is reshaped correctly.
287  std::vector<uint32_t> axes;
288  unsigned int recalulatedAxis = 0;
289 
290  for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i)
291  {
292  // Get TensorInfo from base layer and reduce shape using axis.
293  TensorInfo layerInfo = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
294 
295  axes.emplace_back(desc.m_vAxis[i]);
296 
297  const TensorInfo& reducedTensorInfo = ComputeReductionTensorShape(layerInfo,
298  axes,
299  desc.m_KeepDims);
300 
301  // Create a vector for the single axis to be assigned to the descriptor.
302  // Update axis if keepDims is set reduce layers correctly.
303  std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i] - recalulatedAxis);
304 
305  // Create a descriptor and assign single axis.
306  ReduceDescriptor newReduceDescriptor = baseLayer->GetParameters();
307  newReduceDescriptor.m_vAxis.assign(singleAxis.begin(), singleAxis.end());
308 
309  // Add new layer to graph.
310  std::string layerName = "reduce_layer_" + std::to_string(i);
311 
312  Layer* replacementLayer = PolymorphicDowncast<Layer*>(
313  optimizationViews.GetINetwork()->AddReduceLayer(newReduceDescriptor,
314  layerName.c_str()));
315 
316  // Connect previous layer with new layer.
317  // The first and last layer will be connected when the subgraph is replaced.
318  if (!layers.empty())
319  {
320  layers[i - 1]->GetOutputSlot(0).Connect(replacementLayer->GetInputSlot(0));
321  }
322 
323  // Set updated tensorInfo for new layer.
324  replacementLayer->GetOutputSlot(0).SetTensorInfo(reducedTensorInfo);
325 
326  if (!desc.m_KeepDims)
327  {
328  recalulatedAxis++;
329  }
330 
331  layers.emplace_back(replacementLayer);
332  }
333 
334  // Check if the TensorInfo from the last layer equals the inferred output from the original layer.
335  ARMNN_ASSERT(baseLayer->GetOutputSlot(0).GetTensorInfo() ==
336  PolymorphicDowncast<Layer*>(layers.back())->GetOutputSlot().GetTensorInfo());
337 
338  return layers;
339 }

References INetwork::AddReduceLayer(), ARMNN_ASSERT, ComputeReductionTensorShape(), OptimizationViews::GetINetwork(), Layer::GetInputSlot(), Layer::GetOutputSlot(), ReduceDescriptor::m_KeepDims, ReduceDescriptor::m_vAxis, and OutputSlot::SetTensorInfo().

◆ CheckFlag()

bool armnn::CheckFlag ( MemorySourceFlags  flags,
MemorySource  source 
)
inline

Definition at line 41 of file MemorySources.hpp.

42 {
43  return (static_cast<MemorySourceFlags>(source) & flags) != 0;
44 }

Referenced by LoadedNetwork::ImportInputs(), and LoadedNetwork::ImportOutputs().

◆ CheckFp16Support()

bool armnn::CheckFp16Support ( BackendsMap backends,
const std::vector< BackendId > &  availablePreferredBackends 
)

Definition at line 1029 of file Network.cpp.

1031 {
1032  bool hasFp16 = false;
1033  // Check if the first preferred backend has FP16 support
1034  auto firstBackend = availablePreferredBackends[0];
1035  auto backendObjPtr = backends.find(firstBackend)->second.get();
1036  ARMNN_ASSERT(backendObjPtr);
1037  auto hasFp16Capability = BackendOptions::BackendOption{"HasFp16", true};
1038  auto backendCapabilities = backendObjPtr->GetCapabilities();
1039 
1040  if (HasMatchingCapability(hasFp16Capability, backendCapabilities))
1041  {
1042  // First preferred backend has FP16 support. Enable reduce FP32 to FP16 when fp16-turbo-mode is enabled.
1043  hasFp16 = true;
1044  ARMNN_LOG(debug) << "The first available preferred backend: " << firstBackend
1045  << ", has FP16 support.";
1046  }
1047  else
1048  {
1049  ARMNN_LOG(warning) << "The first available preferred backend: " << firstBackend
1050  << ", does not have FP16 support. "
1051  << "The FP16 turbo mode option will be disable. It will run using FP32.";
1052  }
1053 
1054  // Check if the rest of the available preferred backends have FP16 support
1055  for (size_t i = 1; i < availablePreferredBackends.size(); ++i)
1056  {
1057  auto backend = availablePreferredBackends[i];
1058  backendObjPtr = backends.find(backend)->second.get();
1059  backendCapabilities = backendObjPtr->GetCapabilities();
1060  if (!HasMatchingCapability(hasFp16Capability, backendCapabilities))
1061  {
1062  ARMNN_LOG(warning) << "Next preferred backend: " << backend << ", does not have FP16 support. "
1063  << "It will run using FP32 when falling back to this backend.";
1064  }
1065  else
1066  {
1067  ARMNN_LOG(debug) << "Next preferred backend: " << backend << ", has FP16 support.";
1068  }
1069  }
1070 
1071  return hasFp16;
1072 }

References ARMNN_ASSERT, ARMNN_LOG, debug, HasMatchingCapability(), and warning.

Referenced by Optimize().

◆ CheckScaleSetOnQuantizedType()

bool armnn::CheckScaleSetOnQuantizedType ( Layer layer,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 795 of file Network.cpp.

796 {
797  bool noErrors = true;
798  unsigned int numOutputs = layer->GetNumOutputSlots();
799  for (unsigned int i = 0; i < numOutputs; i++) {
800  OutputSlot& outputSlot = layer->GetOutputSlot(i);
801  TensorInfo info = outputSlot.GetTensorInfo();
802  auto quantizationDataType = info.GetDataType();
803  auto quantizationScales = info.GetQuantizationScales();
804  // For any Quantized Tensor ensure scale(s) are set
805  switch(quantizationDataType) {
806  case DataType::QAsymmU8:
807  case DataType::QSymmS16:
808  case DataType::QSymmS8:
809  case DataType::QAsymmS8:
810  if ((quantizationDataType == DataType::QAsymmU8 || quantizationDataType == DataType::QAsymmS8)
811  && info.HasPerAxisQuantization()) {
812  throw InvalidArgumentException("Per Axis Quantization is not supported in "
813  "Asymmetric Quantization Datatype.");
814  }
815  if ((!info.HasPerAxisQuantization() && info.GetQuantizationScale() == 0.f)
816  || (info.HasPerAxisQuantization() && (quantizationScales.end() !=
817  std::find(quantizationScales.begin(), quantizationScales.end(), 0.f)))) {
818  noErrors = false;
819  std::stringstream ss;
820  ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
821  << " (" << layer->GetNameStr() << ") is of type"
822  << " Quantized value but the scale parameter has not been set";
823  ReportError(ss.str(), errMessages);
824  }
825  // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
826  if (!info.HasPerAxisQuantization() && quantizationDataType == DataType::QAsymmU8 &&
827  (info.GetQuantizationScale() != (1.0f / 256.0f) ||
828  info.GetQuantizationOffset() != 0) &&
829  layer->GetType() == armnn::LayerType::Softmax) {
830  std::stringstream ss;
831  ss << "Quantization parameters for Softmax layer (Scale: " <<
832  info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
833  ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
834  ARMNN_LOG(warning) << ss.str();
835  info.SetQuantizationScale((1.0f / 256.0f));
836  info.SetQuantizationOffset(0);
837  outputSlot.SetTensorInfo(info);
838  }
839  break;
840  default:
841  break;
842  }
843  }
844  return noErrors;
845 }

References ARMNN_LOG, GetLayerTypeAsCString(), Layer::GetNameStr(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Layer::GetType(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, ReportError(), OutputSlot::SetTensorInfo(), Softmax, and warning.

Referenced by AssignBackendsIConnectable().

◆ CheckSupportRule()

bool armnn::CheckSupportRule ( rule,
Optional< std::string & >  reasonIfUnsupported,
const char *  reason 
)

Definition at line 38 of file LayerSupportRules.hpp.

39 {
40  bool supported = rule();
41  if (!supported && reason)
42  {
43  reasonIfUnsupported.value() += std::string(reason) + "\n"; // Append the reason on a new line
44  }
45  return supported;
46 }

References OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by RefLayerSupport::IsActivationSupported(), RefLayerSupport::IsAdditionSupported(), RefLayerSupport::IsArgMinMaxSupported(), RefLayerSupport::IsBatchMatMulSupported(), RefLayerSupport::IsBatchNormalizationSupported(), RefLayerSupport::IsBatchToSpaceNdSupported(), RefLayerSupport::IsBroadcastToSupported(), RefLayerSupport::IsCastSupported(), RefLayerSupport::IsChannelShuffleSupported(), RefLayerSupport::IsComparisonSupported(), RefLayerSupport::IsConcatSupported(), RefLayerSupport::IsConstantSupported(), RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDebugSupported(), RefLayerSupport::IsDepthToSpaceSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), RefLayerSupport::IsDequantizeSupported(), RefLayerSupport::IsDetectionPostProcessSupported(), RefLayerSupport::IsDivisionSupported(), RefLayerSupport::IsElementwiseUnarySupported(), RefLayerSupport::IsFakeQuantizationSupported(), RefLayerSupport::IsFillSupported(), RefLayerSupport::IsFloorSupported(), RefLayerSupport::IsFullyConnectedSupported(), RefLayerSupport::IsGatherNdSupported(), RefLayerSupport::IsGatherSupported(), RefLayerSupport::IsInstanceNormalizationSupported(), RefLayerSupport::IsL2NormalizationSupported(), RefLayerSupport::IsLayerSupported(), RefLayerSupport::IsLogicalBinarySupported(), RefLayerSupport::IsLogSoftmaxSupported(), RefLayerSupport::IsLstmSupported(), RefLayerSupport::IsMaximumSupported(), RefLayerSupport::IsMeanSupported(), RefLayerSupport::IsMemCopySupported(), RefLayerSupport::IsMinimumSupported(), RefLayerSupport::IsMultiplicationSupported(), RefLayerSupport::IsNormalizationSupported(), RefLayerSupport::IsPadSupported(), RefLayerSupport::IsPermuteSupported(), RefLayerSupport::IsPooling2dSupported(), RefLayerSupport::IsPooling3dSupported(), RefLayerSupport::IsPreluSupported(), RefLayerSupport::IsQuantizeSupported(), RefLayerSupport::IsRankSupported(), RefLayerSupport::IsReduceSupported(), RefLayerSupport::IsReshapeSupported(), RefLayerSupport::IsResizeSupported(), RefLayerSupport::IsReverseV2Supported(), RefLayerSupport::IsShapeSupported(), RefLayerSupport::IsSliceSupported(), RefLayerSupport::IsSoftmaxSupported(), RefLayerSupport::IsSpaceToBatchNdSupported(), RefLayerSupport::IsSpaceToDepthSupported(), RefLayerSupport::IsSplitterSupported(), RefLayerSupport::IsStackSupported(), RefLayerSupport::IsStridedSliceSupported(), RefLayerSupport::IsSubtractionSupported(), RefLayerSupport::IsTileSupported(), RefLayerSupport::IsTransposeConvolution2dSupported(), RefLayerSupport::IsTransposeSupported(), and RefLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ ClAbsWorkloadValidate()

arm_compute::Status ClAbsWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file ClAbsWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::CLAbsLayer::validate(&aclInput, &aclOutput);
25 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClActivationWorkloadValidate()

arm_compute::Status ClActivationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ActivationDescriptor descriptor 
)

Definition at line 17 of file ClActivationWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  const arm_compute::ActivationLayerInfo activationLayerInfo =
26 
27  return arm_compute::CLActivationLayer::validate(&aclInput,
28  &aclOutput,
29  activationLayerInfo);
30 }

Referenced by ClLayerSupport::IsActivationSupported().

◆ ClAdditionValidate()

arm_compute::Status ClAdditionValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 45 of file ClAdditionWorkload.cpp.

49 {
50  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
51  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
52  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
53 
54  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
55  activationDescriptor);
56 
57  const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info,
58  &aclInput1Info,
59  &aclOutputInfo,
60  g_AclConvertPolicy,
61  activationInfo);
62 
63  return aclStatus;
64 }

Referenced by ClLayerSupport::IsAdditionSupported(), ClLayerSupport::IsLayerSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClArgMinMaxWorkloadValidate()

arm_compute::Status ClArgMinMaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ArgMinMaxDescriptor descriptor 
)

Definition at line 31 of file ClArgMinMaxWorkload.cpp.

34 {
35  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
36  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
37 
38  auto numDims = input.GetNumDimensions();
39  auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis);
40  int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
41 
42  if (descriptor.m_Function == ArgMinMaxFunction::Max)
43  {
44  return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
45  arm_compute::ReductionOperation::ARG_IDX_MAX);
46  }
47  else
48  {
49  return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
50  arm_compute::ReductionOperation::ARG_IDX_MIN);
51  }
52 }

Referenced by ClLayerSupport::IsArgMinMaxSupported().

◆ ClBackendId()

constexpr const char* armnn::ClBackendId ( )
constexpr

Definition at line 10 of file ClBackendId.hpp.

10 { return "GpuAcc"; }

Referenced by ClBackend::GetIdStatic().

◆ ClBatchMatMulValidate()

arm_compute::Status ClBatchMatMulValidate ( const TensorInfo inputInfoX,
const TensorInfo inputInfoY,
const TensorInfo outputInfo,
const BatchMatMulDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file ClBatchMatMulWorkload.cpp.

29 {
30  if (descriptor.m_AdjointX || descriptor.m_AdjointY )
31  {
32  throw Exception("Support for adjoint not implemented.");
33  }
34  if (descriptor.m_DataLayoutX != armnn::DataLayout::NCHW || descriptor.m_DataLayoutY != armnn::DataLayout::NCHW )
35  {
36  throw Exception("Only supported the MatMul in the last 2 dimensions");
37  }
38 
39  arm_compute::TensorInfo aclInputInfoX = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoX);
40  arm_compute::TensorInfo aclInputInfoY = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoY);
41  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(outputInfo);
42 
43  // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
44  aclInputInfoX.set_are_values_constant(false);
45  aclInputInfoY.set_are_values_constant(false);
46 
47  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
48  activationDescriptor);
49 
50  arm_compute::MatMulInfo matMulInfo;
51  matMulInfo.adj_lhs(descriptor.m_TransposeX);
52  matMulInfo.adj_rhs(descriptor.m_TransposeY);
53 
54  return arm_compute::CLMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, activationInfo);
55 }

References BatchMatMulDescriptor::m_AdjointX, BatchMatMulDescriptor::m_AdjointY, BatchMatMulDescriptor::m_DataLayoutX, BatchMatMulDescriptor::m_DataLayoutY, and NCHW.

Referenced by ClLayerSupport::IsBatchMatMulSupported().

◆ ClBatchNormalizationValidate()

arm_compute::Status ClBatchNormalizationValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo mean,
const TensorInfo var,
const TensorInfo beta,
const TensorInfo gamma,
const BatchNormalizationDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file ClBatchNormalizationFloatWorkload.cpp.

27 {
28  const arm_compute::TensorInfo aclInputInfo =
29  armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
30  const arm_compute::TensorInfo aclOutputInfo =
31  armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclMeanInfo =
33  armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
34  const arm_compute::TensorInfo aclVarInfo =
35  armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
36  const arm_compute::TensorInfo aclBetaInfo =
37  armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
38  const arm_compute::TensorInfo aclGammaInfo =
39  armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
40 
41  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
42  activationDescriptor);
43 
44  return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo,
45  &aclOutputInfo,
46  &aclMeanInfo,
47  &aclVarInfo,
48  &aclBetaInfo,
49  &aclGammaInfo,
50  descriptor.m_Eps,
51  activationInfo);
52 }

Referenced by ClLayerSupport::IsBatchNormalizationSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClBatchToSpaceNdWorkloadValidate()

arm_compute::Status ClBatchToSpaceNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const BatchToSpaceNdDescriptor descriptor 
)

Definition at line 17 of file ClBatchToSpaceNdWorkload.cpp.

20 {
21  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
22  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
23 
24  arm_compute::Status statusBatchToSpace = arm_compute::Status(arm_compute::ErrorCode::OK);
25  arm_compute::Status statusReshapeInput = arm_compute::Status(arm_compute::ErrorCode::OK);
26  arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
27 
28  arm_compute::TensorInfo aclReshapeInputInfo = aclInputInfo;
29  arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
30 
31  // When a spacial dimension is missing (rank=3) set W to 1
32  const unsigned int rank = input.GetNumDimensions();
33  if (rank == 3)
34  {
35  const arm_compute::TensorShape inputShape = aclInputInfo.tensor_shape();
36  const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
37 
38  if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
39  {
40  // In ACL dimensions are right to left: C, W, H, N
41  aclInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
42  aclOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
43  }
44  else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
45  {
46  // In ACL dimensions are right to left: W, H, C, N
47  aclInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
48  aclOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
49  }
50  else
51  {
52  throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
53  }
54 
55  statusReshapeInput = arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
56  statusReshapeOutput = arm_compute::CLReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
57  }
58 
59  // ArmNN blockShape is [H, W] ACl asks for W, H
60  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
61  int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
62 
63  const arm_compute::CropInfo cropInfo = BuildArmComputeCropInfo(descriptor, rank);
64 
65  statusBatchToSpace = arm_compute::CLBatchToSpaceLayer::validate(rank == 3 ? &aclReshapeInputInfo : &aclInputInfo,
66  blockWidth,
67  blockHeight,
68  rank == 3 ? &aclReshapeOutputInfo : &aclOutputInfo,
69  cropInfo);
70 
71  if (statusReshapeInput.error_code() == arm_compute::ErrorCode::OK &&
72  statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
73  statusBatchToSpace.error_code() == arm_compute::ErrorCode::OK)
74  {
75  return arm_compute::Status(arm_compute::ErrorCode::OK,
76  "All BatchToSpace layers validate status OK.");
77  }
78  else
79  {
80  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
81  "BatchToSpace layer validate status failed."
82  + statusBatchToSpace.error_description()
83  + statusReshapeInput.error_description()
84  + statusReshapeOutput.error_description());
85  }
86 }

Referenced by ClLayerSupport::IsBatchToSpaceNdSupported().

◆ ClCastValidate()

arm_compute::Status ClCastValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 20 of file ClCastWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
25  return arm_compute::CLCast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
26 }

Referenced by ClLayerSupport::IsCastSupported().

◆ ClChannelShuffleValidate()

arm_compute::Status ClChannelShuffleValidate ( const TensorInfo input,
const TensorInfo output,
const ChannelShuffleDescriptor descriptor 
)

Definition at line 20 of file ClChannelShuffleWorkload.cpp.

23 {
24  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
25  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
26 
27  // In Arm NN and in NNAPI, channel shuffle implementation is datalayout agnostic and it has axis as a parameter.
28  // The channel shuffle Implementation for Neon is dependent on datalayout and does not have axis as a parameter,
29  // it only supports channel shuffle for 4D tensors in dimension C (1 or 3).
30  arm_compute::DataLayout aclDataLayout;
31  if (input.GetNumDimensions() == 4)
32  {
33  switch (descriptor.m_Axis)
34  {
35  case 1:
36  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
37  break;
38  case 3:
39  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NHWC);
40  break;
41  default:
42  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported axis"};
43  }
44  aclInputInfo.set_data_layout(aclDataLayout);
45  aclOutputInfo.set_data_layout(aclDataLayout);
46  return arm_compute::CLChannelShuffleLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_NumGroups);
47  }
48  else
49  {
50  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported number of dimensions"};
51  }
52 }

Referenced by ClLayerSupport::IsChannelShuffleSupported().

◆ ClComparisonWorkloadValidate()

arm_compute::Status ClComparisonWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ComparisonDescriptor descriptor 
)

Definition at line 24 of file ClComparisonWorkload.cpp.

28 {
29  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
30  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
31  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
32 
33  const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(descriptor);
34 
35  const arm_compute::Status aclStatus = arm_compute::CLComparison::validate(&aclInput0Info,
36  &aclInput1Info,
37  &aclOutputInfo,
38  comparisonOperation);
39  return aclStatus;
40 }

Referenced by ClLayerSupport::IsComparisonSupported().

◆ ClConcatWorkloadValidate()

arm_compute::Status ClConcatWorkloadValidate ( const std::vector< const TensorInfo * > &  inputs,
const TensorInfo output,
const OriginsDescriptor descriptor 
)

Definition at line 27 of file ClConcatWorkload.cpp.

30 {
31  std::vector<arm_compute::TensorInfo> aclInputs;
32  for (const TensorInfo* input : inputs)
33  {
34  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
35  aclInputs.emplace_back(aclInputInfo);
36  }
37  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
38  std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
39  for (arm_compute::ITensorInfo& input : aclInputs)
40  {
41  aclInputPtrs.emplace_back(&input);
42  }
43 
44  size_t aclAxis = CalcAxis(descriptor);
45  return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
46 }

Referenced by ClLayerSupport::IsConcatSupported().

◆ ClConstantWorkloadValidate()

arm_compute::Status ClConstantWorkloadValidate ( const TensorInfo output)

Definition at line 18 of file ClConstantWorkload.cpp.

19 {
20  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  std::array<arm_compute::DataType,8> supportedTypes = {
23  arm_compute::DataType::F16,
24  arm_compute::DataType::F32,
25  arm_compute::DataType::QASYMM8,
26  arm_compute::DataType::QASYMM8_SIGNED,
27  arm_compute::DataType::QSYMM16,
28  arm_compute::DataType::QSYMM8,
29  arm_compute::DataType::QSYMM8_PER_CHANNEL,
30  arm_compute::DataType::S32
31  };
32  auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
33 
34  if (it != end(supportedTypes))
35  {
36  return arm_compute::Status{};
37  }
38  else
39  {
40  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
41  }
42 }

Referenced by ClLayerSupport::IsConstantSupported().

◆ ClContextBufferHasIdentifier()

bool armnn::ClContextBufferHasIdentifier ( const void *  buf)
inline

Definition at line 152 of file ClContextSchema_generated.h.

152  {
153  return flatbuffers::BufferHasIdentifier(
154  buf, ClContextIdentifier());
155 }

References ClContextIdentifier().

◆ ClContextExtension()

const char* armnn::ClContextExtension ( )
inline

Definition at line 167 of file ClContextSchema_generated.h.

167  {
168  return "armnn";
169 }

◆ ClContextIdentifier()

const char* armnn::ClContextIdentifier ( )
inline

◆ ClConvertFp16ToFp32WorkloadValidate()

arm_compute::Status ClConvertFp16ToFp32WorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 44 of file ClConvertFp16ToFp32Workload.cpp.

45 {
46  if (input.GetDataType() != DataType::Float16)
47  {
48  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float16");
49  }
50  if (output.GetDataType() != DataType::Float32)
51  {
52  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float32");
53  }
54 
55  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
56  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
57 
58  const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
59  &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
60 
61  return aclStatus;
62 }

References Float16, Float32, and TensorInfo::GetDataType().

Referenced by ClLayerSupport::IsConvertFp16ToFp32Supported().

◆ ClConvertFp32ToFp16WorkloadValidate()

arm_compute::Status ClConvertFp32ToFp16WorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 44 of file ClConvertFp32ToFp16Workload.cpp.

45 {
46  if (input.GetDataType() != DataType::Float32)
47  {
48  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float32");
49  }
50  if (output.GetDataType() != DataType::Float16)
51  {
52  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float16");
53  }
54 
55  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
56  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
57 
58  const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
59  &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
60 
61  return aclStatus;
62 }

References Float16, Float32, and TensorInfo::GetDataType().

Referenced by ClLayerSupport::IsConvertFp32ToFp16Supported().

◆ ClConvolution2dWorkloadValidate()

arm_compute::Status ClConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 23 of file ClConvolution2dWorkload.cpp.

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
33  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
34  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
35 
36  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
37  descriptor.m_DilationY);
38 
39  arm_compute::TensorInfo aclBiasesInfo;
40  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
41 
42  if (descriptor.m_BiasEnabled)
43  {
44  if (!biases.has_value())
45  {
46  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
47  "ArmNN ClConvolution2dWorkload has empty bias value."};
48  }
49  // There's currently a problem with non const bias, so we'll explicitly block it here.
50  if (!biases.value().IsConstant())
51  {
52  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
53  "ArmNN ClDepthwiseConv2dWorkload does not support non constant bias."};
54  }
55  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
56  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
57  optionalAclBiasesInfo = &aclBiasesInfo;
58  }
59 
60  arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
61 
62  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
63  activationDescriptor);
64 
65  return arm_compute::CLConvolutionLayer::validate(&aclInputInfo,
66  &aclWeightsInfo,
67  optionalAclBiasesInfo,
68  &aclOutputInfo,
69  layerInfo,
70  arm_compute::WeightsInfo(),
71  aclDilationInfo,
72  activationInfo,
73  isFastMathEnabled);
74 }

Referenced by ClLayerSupport::IsConvolution2dSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClConvolution3dWorkloadValidate()

arm_compute::Status ClConvolution3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution3dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 23 of file ClConvolution3dWorkload.cpp.

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
33 
34  arm_compute::TensorInfo aclBiasesInfo;
35  arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
36  if (descriptor.m_BiasEnabled)
37  {
38  if (!biases.has_value())
39  {
40  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
41  "ArmNN ClConvolution3dWorkload has empty bias value."};
42  }
43  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
44  optionalAclBiasesInfo = &aclBiasesInfo;
45  }
46 
47  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
48 
49  const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
50  isFastMathEnabled,
51  activationDescriptor);
52 
53  return arm_compute::CLConv3D::validate(&aclInputInfo,
54  &aclWeightsInfo,
55  optionalAclBiasesInfo,
56  &aclOutputInfo,
57  aclConv3DInfo);
58 }

Referenced by ClLayerSupport::IsConvolution3dSupported().

◆ ClDepthToSpaceWorkloadValidate()

arm_compute::Status ClDepthToSpaceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthToSpaceDescriptor descriptor 
)

Definition at line 22 of file ClDepthToSpaceWorkload.cpp.

25 {
26  DataLayout dataLayout = descriptor.m_DataLayout;
27  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
28 
29  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
30 
31  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
32 
33  const arm_compute::Status aclStatus = arm_compute::CLDepthToSpaceLayer::validate(&aclInputInfo,
34  &aclOutputInfo,
35  blockSize);
36  return aclStatus;
37 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsDepthToSpaceSupported().

◆ ClDepthwiseConvolutionWorkloadValidate()

arm_compute::Status ClDepthwiseConvolutionWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const ActivationDescriptor activationDescriptor 
)

Definition at line 26 of file ClDepthwiseConvolutionWorkload.cpp.

32 {
33  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
34  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
35 
36  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
37  //
38  // ACL format for weights for depthwise is:
39  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
40  // - [1, C, H, W] for [N, C, H, W] input/output layout
41  //
42  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
43  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
44  // so we do the permute here for the TensorInfo weights.
45  unsigned int aclDepthMultiplier;
46  TensorInfo weightsPermuted;
47  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
48 
49  // Convert the weights into the compute library format
50  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
51  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
52 
53  arm_compute::TensorInfo aclBiasesInfo;
54  arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
55  if (descriptor.m_BiasEnabled)
56  {
57  if (!biases.has_value())
58  {
59  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
60  "ArmNN ClDepthwiseConv2dWorkload has empty bias value."};
61  }
62  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
63  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
64  optionalAclBiasesInfo = &aclBiasesInfo;
65  }
66 
67  const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
68  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
69  descriptor.m_DilationX,
70  descriptor.m_DilationY);
71 
72  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
73  activationDescriptor);
74 
75  return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
76  &aclWeightsInfo,
77  optionalAclBiasesInfo,
78  &aclOutputInfo,
79  aclPadStrideInfo,
80  aclDepthMultiplier,
81  activationInfo,
82  aclDilationInfo);
83 
84 }

Referenced by ClLayerSupport::IsDepthwiseConvolutionSupported(), ClLayerSupport::IsDilatedDepthwiseConvolutionSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClDequantizeWorkloadValidate()

arm_compute::Status ClDequantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 22 of file ClDequantizeWorkload.cpp.

23 {
24  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
25  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
26 
27  return arm_compute::CLDequantizationLayer::validate(&aclInputInfo, &aclOutputInfo);
28 }

Referenced by ClLayerSupport::IsDequantizeSupported().

◆ ClDivisionWorkloadValidate()

arm_compute::Status ClDivisionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 18 of file ClDivisionWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
24  const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
28  activationDescriptor);
29 
30  return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput, activationInfo);
31 }

Referenced by ClLayerSupport::IsDivisionSupported(), ClLayerSupport::IsLayerSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClElementwiseBinaryValidate()

arm_compute::Status ClElementwiseBinaryValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ElementwiseBinaryDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 64 of file ClElementwiseBinaryWorkload.cpp.

69 {
70  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
71  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
72  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
73 
74  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
75  activationDescriptor);
76 
77  switch (descriptor.m_Operation)
78  {
80  return arm_compute::CLElementwisePower::validate(&aclInput0Info,
81  &aclInput1Info,
82  &aclOutputInfo,
83  activationInfo);
85  return arm_compute::CLElementwiseSquaredDiff::validate(&aclInput0Info,
86  &aclInput1Info,
87  &aclOutputInfo,
88  activationInfo);
89  default:
90  throw InvalidArgumentException("Unknown binary operator", CHECK_LOCATION());
91  }
92 }

Referenced by ClLayerSupport::IsLayerSupported().

◆ ClExpWorkloadValidate()

arm_compute::Status ClExpWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClExpWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLExpLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClFloorWorkloadValidate()

arm_compute::Status ClFloorWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 14 of file ClFloorFloatWorkload.cpp.

16 {
17  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
18  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
19 
20  return arm_compute::CLFloor::validate(&aclInput, &aclOutput);
21 }

Referenced by ClLayerSupport::IsFloorSupported().

◆ ClFullyConnectedWorkloadValidate()

arm_compute::Status ClFullyConnectedWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const FullyConnectedDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file ClFullyConnectedWorkload.cpp.

25 {
26  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
27  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
28  arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
29  aclWeights.set_are_values_constant(weights.IsConstant());
30 
31  arm_compute::TensorInfo aclBiases;
32  arm_compute::TensorInfo* optionalAclBiases = nullptr;
33  if (descriptor.m_BiasEnabled)
34  {
35  ARMNN_ASSERT(biases.has_value());
36  aclBiases = BuildArmComputeTensorInfo(biases.value());
37  aclBiases.set_are_values_constant(biases.value().IsConstant());
38  optionalAclBiases = &aclBiases;
39  }
40 
41  const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
42  ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
43 
44  return arm_compute::CLFullyConnectedLayer::validate(&aclInput,
45  &aclWeights,
46  optionalAclBiases,
47  &aclOutput,
48  fullyConnectedLayerInfo);
49 }

Referenced by ClLayerSupport::IsFullyConnectedSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClGatherNdWorkloadValidate()

arm_compute::Status ClGatherNdWorkloadValidate ( const TensorInfo paramsInfo,
const TensorInfo indicesInfo,
const TensorInfo outputInfo 
)

Validate Mul

Validate ReduceSum

Validate Gather

Validate Reshape

Return OK if all the layers are valid

Definition at line 16 of file ClGatherNdWorkload.cpp.

19 {
20  // Calculate ND, K, W, C.
21  std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
22 
23  /// Validate Mul
24  // Indices with shape { W, ND }
25  armnn::TensorInfo indices_W_ND_Info = indicesInfo;
26  indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
27  const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
28 
29  // Flattened coefficients with shape { ND }
30  armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
31  flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
32  const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
33 
34  // Output of Mul with shape { W, ND }
35  const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
36 
37  auto statusMul = arm_compute::CLPixelWiseMultiplication::validate(&aclIndicesInfo,
38  &aclFlattenedCoeffInfo,
39  &aclOutputMulInfo,
40  1.0f,
41  arm_compute::ConvertPolicy::WRAP,
42  arm_compute::RoundingPolicy::TO_ZERO,
43  arm_compute::ActivationLayerInfo());
44 
45  /// Validate ReduceSum
46  // Flattened indices with shape { W }
47  armnn::TensorInfo flattenedIndices_Info = indicesInfo;
48  flattenedIndices_Info.SetShape({ keyIndices["W"] });
49  const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
50 
51  const std::vector<unsigned int> armnnReduceAxes(1, 1);
52  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
53  indices_W_ND_Info.GetNumDimensions(),
54  armnnReduceAxes);
55 
56  auto statusReduceSum = arm_compute::CLReductionOperation::validate(&aclOutputMulInfo,
57  &aclFlattenedIndicesInfo,
58  static_cast<unsigned int>(coords[0]),
59  arm_compute::ReductionOperation::SUM,
60  false);
61 
62  /// Validate Gather
63  // Params with shape { K, C }
64  armnn::TensorInfo params_K_C_Info = paramsInfo;
65  params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
66  const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
67 
68  // Output of gather with shape { W, C }
69  armnn::TensorInfo outputGather_Info = outputInfo;
70  outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
71  const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
72 
73  auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
74  auto statusGather =
75  arm_compute::CLGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
76 
77  /// Validate Reshape
78  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
79 
80  auto statusReshape = arm_compute::CLReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
81 
82  /// Return OK if all the layers are valid
83  auto okCode = arm_compute::ErrorCode::OK;
84  if (statusMul.error_code() == okCode &&
85  statusReduceSum.error_code() == okCode &&
86  statusGather.error_code() == okCode &&
87  statusReshape.error_code() == okCode)
88  {
89  return arm_compute::Status(arm_compute::ErrorCode::OK,
90  "All GatherND layers validate status OK.");
91  }
92  else
93  {
94  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
95  "GatherND layer validate status failed.");
96  }
97 }

References CalculateGatherNdKeyIndices(), and TensorInfo::SetShape().

Referenced by ClLayerSupport::IsGatherNdSupported().

◆ ClGatherWorkloadValidate()

arm_compute::Status ClGatherWorkloadValidate ( const TensorInfo input,
const TensorInfo indices,
const TensorInfo output,
const GatherDescriptor descriptor 
)

Definition at line 15 of file ClGatherWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
22  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
23 
24  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
25 
26  return arm_compute::CLGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
27 }

Referenced by ClLayerSupport::IsGatherSupported().

◆ ClImportTensorHandleFactoryId()

constexpr const char* armnn::ClImportTensorHandleFactoryId ( )
constexpr

Definition at line 15 of file ClImportTensorHandleFactory.hpp.

16 {
17  return "Arm/Cl/ImportTensorHandleFactory";
18 }

Referenced by ClImportTensorHandleFactory::GetIdStatic().

◆ ClInstanceNormalizationWorkloadValidate()

arm_compute::Status ClInstanceNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const InstanceNormalizationDescriptor descriptor 
)

Definition at line 18 of file ClInstanceNormalizationWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
24 
25  return arm_compute::CLInstanceNormalizationLayer::validate(&aclInputInfo,
26  &aclOutputInfo,
27  descriptor.m_Gamma,
28  descriptor.m_Beta,
29  descriptor.m_Eps);
30 }

Referenced by ClLayerSupport::IsInstanceNormalizationSupported().

◆ ClL2NormalizationWorkloadValidate()

arm_compute::Status ClL2NormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const L2NormalizationDescriptor descriptor 
)

Definition at line 17 of file ClL2NormalizationFloatWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
22  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
23 
24  int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
25 
26  return arm_compute::CLL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps);
27 }

Referenced by ClLayerSupport::IsL2NormalizationSupported().

◆ ClLogicalAndWorkloadValidate()

arm_compute::Status ClLogicalAndWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 20 of file ClLogicalAndWorkload.cpp.

23 {
24  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
25  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
26  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
27 
28  const arm_compute::Status aclStatus = arm_compute::CLLogicalAnd::validate(&aclInputInfo0,
29  &aclInputInfo1,
30  &aclOutputInfo);
31  return aclStatus;
32 }

Referenced by ClLayerSupport::IsLogicalBinarySupported().

◆ ClLogicalNotWorkloadValidate()

arm_compute::Status ClLogicalNotWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 20 of file ClLogicalNotWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
25 
26  const arm_compute::Status aclStatus = arm_compute::CLLogicalNot::validate(&aclInputInfo,
27  &aclOutputInfo);
28  return aclStatus;
29 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClLogicalOrWorkloadValidate()

arm_compute::Status ClLogicalOrWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 20 of file ClLogicalOrWorkload.cpp.

23 {
24  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
25  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
26  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
27 
28  const arm_compute::Status aclStatus = arm_compute::CLLogicalOr::validate(&aclInputInfo0,
29  &aclInputInfo1,
30  &aclOutputInfo);
31  return aclStatus;
32 }

Referenced by ClLayerSupport::IsLogicalBinarySupported().

◆ ClLogSoftmaxWorkloadValidate()

arm_compute::Status ClLogSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const LogSoftmaxDescriptor descriptor 
)

Definition at line 17 of file ClLogSoftmaxWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
25  return arm_compute::CLLogSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
26 }

Referenced by ClLayerSupport::IsLogSoftmaxSupported().

◆ ClLogWorkloadValidate()

arm_compute::Status ClLogWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClLogWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLLogLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClLstmFloatWorkloadValidate()

arm_compute::Status ClLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo scratchBuffer,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const LstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 244 of file ClLstmFloatWorkload.cpp.

249 {
250  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
251 
252  // The inputs and the outputs
253  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
254  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
255  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
256  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
257  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
258  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
259  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
260 
261  // Basic parameters
262  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
263  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
264  const arm_compute::TensorInfo aclInputToCellWeightsInfo
265  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
266  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
267  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
268  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
269  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
270  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
271  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
272  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
273  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
274  const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
275  const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
276  const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
277 
278  arm_compute::TensorInfo aclInputToInputWeightsInfo;
279  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
280  arm_compute::TensorInfo aclCellToInputWeightsInfo;
281  arm_compute::TensorInfo aclInputGateBiasInfo;
282  arm_compute::TensorInfo aclProjectionWeightsInfo;
283  arm_compute::TensorInfo aclProjectionBiasInfo;
284  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
285  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
286  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
287  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
288  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
289  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
290 
291  if (!descriptor.m_CifgEnabled)
292  {
293  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
294  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
295 
296  if (paramsInfo.m_CellToInputWeights != nullptr)
297  {
298  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
299  }
300  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
301  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
302  paramsInfo.m_CellToInputWeights != nullptr ?
303  &aclCellToInputWeightsInfo: nullptr,
304  &aclInputGateBiasInfo);
305  }
306 
307  if (descriptor.m_ProjectionEnabled)
308  {
309  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
310 
311  if (paramsInfo.m_ProjectionBias != nullptr)
312  {
313  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
314  }
315  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
316  paramsInfo.m_ProjectionBias != nullptr ?
317  &aclProjectionBiasInfo: nullptr);
318  }
319 
320  if (descriptor.m_PeepholeEnabled)
321  {
322  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
323  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
324  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
325  }
326 
327  float cell_threshold = descriptor.m_ClippingThresCell;
328  float projection_threshold = descriptor.m_ClippingThresProj;
329 
330  // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
331  arm_compute::ActivationLayerInfo activationLayerInfo =
332  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
333 
334  if (descriptor.m_LayerNormEnabled)
335  {
336  if (!descriptor.m_CifgEnabled)
337  {
338  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
339  }
340 
341  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
342 
343  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
344 
345  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
346 
347  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ?
348  nullptr : &aclInputLayerNormWeightsInfo,
349  &aclForgetLayerNormWeightsInfo,
350  &aclCellLayerNormWeightsInfo,
351  &aclOutputLayerNormWeightsInfo);
352  }
353 
354  return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo,
355  &aclInputToCellWeightsInfo,
356  &aclInputToOutputWeightsInfo,
357  &aclRecurrentToForgetWeightsInfo,
358  &aclRecurrentToCellWeightsInfo,
359  &aclRecurrentToOutputWeightsInfo,
360  &aclForgetGateBiasInfo,
361  &aclCellBiasInfo,
362  &aclOutputGateBiasInfo,
363  &aclOutputStateInInfo, &aclCellStateInInfo,
364  &aclScratchBufferInfo, &aclOutputStateOutInfo,
365  &aclCellStateOutInfo, &aclOutputInfo,
366  lstm_params_info, activationLayerInfo,
367  cell_threshold, projection_threshold);
368 }

Referenced by ClLayerSupport::IsLstmSupported().

◆ ClMaximumWorkloadValidate()

arm_compute::Status ClMaximumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 24 of file ClMaximumWorkload.cpp.

27 {
28  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
29  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
30  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
31 
32  const arm_compute::Status aclStatus = arm_compute::CLElementwiseMax::validate(&aclInput0Info,
33  &aclInput1Info,
34  &aclOutputInfo);
35 
36  return aclStatus;
37 }

Referenced by ClLayerSupport::IsLayerSupported(), and ClLayerSupport::IsMaximumSupported().

◆ ClMeanValidate()

arm_compute::Status ClMeanValidate ( const TensorInfo input,
const TensorInfo output,
const MeanDescriptor descriptor 
)

Definition at line 17 of file ClMeanWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
25  input.GetNumDimensions(),
26  descriptor.m_Axis);
27 
28  return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
29 }

Referenced by ClLayerSupport::IsMeanSupported().

◆ ClMinimumWorkloadValidate()

arm_compute::Status ClMinimumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 24 of file ClMinimumWorkload.cpp.

27 {
28  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
29  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
30  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
31 
32  const arm_compute::Status aclStatus = arm_compute::CLElementwiseMin::validate(&aclInput0Info,
33  &aclInput1Info,
34  &aclOutputInfo);
35 
36  return aclStatus;
37 }

Referenced by ClLayerSupport::IsLayerSupported(), and ClLayerSupport::IsMinimumSupported().

◆ ClMultiplicationWorkloadValidate()

arm_compute::Status ClMultiplicationWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 18 of file ClMultiplicationWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
24  const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  auto convertPolicy = (IsQuantizedType(input0.GetDataType()) || IsQuantizedType(input1.GetDataType())) ?
28  arm_compute::ConvertPolicy::SATURATE :
29  arm_compute::ConvertPolicy::WRAP;
30 
31  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
32  activationDescriptor);
33 
34  // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
35  // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
36  // ignored for F32 tensors.
37  return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1,
38  &aclInput2,
39  &aclOutput,
40  1.0f,
41  convertPolicy,
42  arm_compute::RoundingPolicy::TO_ZERO,
43  activationInfo);
44 }

Referenced by ClLayerSupport::IsLayerSupported(), ClLayerSupport::IsMultiplicationSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClNegWorkloadValidate()

arm_compute::Status ClNegWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClNegWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLNegLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClNormalizationWorkloadValidate()

arm_compute::Status ClNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const NormalizationDescriptor descriptor 
)

Definition at line 19 of file ClNormalizationFloatWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
25 
26  arm_compute::NormalizationLayerInfo layerInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
27 
28  return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
29 }

Referenced by ClLayerSupport::IsNormalizationSupported().

◆ ClPadValidate()

arm_compute::Status ClPadValidate ( const TensorInfo input,
const TensorInfo output,
const PadDescriptor descriptor 
)

Definition at line 62 of file ClPadWorkload.cpp.

65 {
66  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
67  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
68 
69  std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size());
70 
71  std::reverse_copy(std::begin(descriptor.m_PadList),
72  std::end(descriptor.m_PadList),
73  std::begin(reversed_PadList));
74 
75  arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
76 
77  // PixelValue is currently unused when validating, but it's required to pass in PaddingMode.
78  arm_compute::PixelValue pixelValue = GetPixelValue(&aclInputInfo, descriptor.m_PadValue);
79  const arm_compute::Status aclStatus =
80  arm_compute::CLPadLayer::validate(&aclInputInfo,
81  &aclOutputInfo,
82  padList,
83  pixelValue,
84  ConvertPaddingModeToAcl(descriptor.m_PaddingMode));
85 
86  return aclStatus;
87 }

Referenced by ClLayerSupport::IsPadSupported().

◆ ClPermuteWorkloadValidate()

arm_compute::Status ClPermuteWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const PermuteDescriptor descriptor 
)

Definition at line 17 of file ClPermuteWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
24 
25  return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo,
26  armcomputetensorutils::BuildArmComputePermutationVector(mappings));
27 }

Referenced by ClLayerSupport::IsPermuteSupported().

◆ ClPooling2dWorkloadValidate()

arm_compute::Status ClPooling2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling2dDescriptor descriptor 
)

Definition at line 18 of file ClPooling2dWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
24 
25  arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
26 
27  return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
28 }

Referenced by ClLayerSupport::IsPooling2dSupported().

◆ ClPooling3dWorkloadValidate()

arm_compute::Status ClPooling3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling3dDescriptor descriptor 
)

Definition at line 18 of file ClPooling3dWorkload.cpp.

21  {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
24 
25  arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
26 
27  return arm_compute::CLPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
28  }

Referenced by ClLayerSupport::IsPooling3dSupported().

◆ ClPreluWorkloadValidate()

arm_compute::Status ClPreluWorkloadValidate ( const TensorInfo input,
const TensorInfo alpha,
const TensorInfo output 
)

Definition at line 16 of file ClPreluWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::CLPReluLayer::validate(&aclInput,
25  &aclAlpha,
26  &aclOutput);
27 }

Referenced by ClLayerSupport::IsPreluSupported().

◆ ClQLstmWorkloadValidate()

arm_compute::Status ClQLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo cellStateIn,
const TensorInfo outputStateIn,
const TensorInfo cellStateOut,
const TensorInfo outputStateOut,
const TensorInfo output,
const QLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 247 of file ClQLstmWorkload.cpp.

255 {
256  arm_compute::LSTMParams<arm_compute::ITensorInfo> aclParamsInfo;
257 
258  // Input/Output tensor info
259  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
260  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
261  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
262 
263  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
264  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
265  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
266 
267  // Mandatory tensor info
268  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
269  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
270  const arm_compute::TensorInfo aclInputToCellWeightsInfo
271  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
272  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
273  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
274  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
275  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
276  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
277  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
278  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
279  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
280  const arm_compute::TensorInfo aclForgetGateBiasInfo
281  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
282  const arm_compute::TensorInfo aclCellBiasInfo
283  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
284  const arm_compute::TensorInfo aclOutputGateBiasInfo
285  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
286 
287  // Optional tensor info
288  arm_compute::TensorInfo aclInputToInputWeightsInfo;
289  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
290 
291  arm_compute::TensorInfo aclCellToInputWeightsInfo;
292  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
293  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
294 
295  arm_compute::TensorInfo aclInputGateBiasInfo;
296 
297  arm_compute::TensorInfo aclProjectionWeightsInfo;
298  arm_compute::TensorInfo aclProjectionBiasInfo;
299 
300  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
301  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
302  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
303  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
304 
305  // Create tensor info for optional params if they are enabled
306  if (descriptor.m_PeepholeEnabled)
307  {
308  if (!descriptor.m_CifgEnabled)
309  {
310  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
311  }
312 
313  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
314  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
315 
316  // Set peephole params info
317  aclParamsInfo.set_peephole_params(&aclCellToForgetWeightsInfo,
318  &aclCellToOutputWeightsInfo);
319  }
320 
321  if (descriptor.m_ProjectionEnabled)
322  {
323  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
324 
325  if (paramsInfo.m_ProjectionBias != nullptr)
326  {
327  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
328  }
329 
330  // Set projection params info
331  aclParamsInfo.set_projection_params(
332  &aclProjectionWeightsInfo,
333  paramsInfo.m_ProjectionBias != nullptr ? &aclProjectionBiasInfo : nullptr);
334  }
335 
336  if (descriptor.m_LayerNormEnabled)
337  {
338  if (!descriptor.m_CifgEnabled)
339  {
340  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
341  }
342 
343  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
344  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
345  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
346 
347  // Set layer norm params info
348  aclParamsInfo.set_layer_normalization_params(
349  paramsInfo.m_InputLayerNormWeights != nullptr ? &aclInputLayerNormWeightsInfo : nullptr,
350  &aclForgetLayerNormWeightsInfo,
351  &aclCellLayerNormWeightsInfo,
352  &aclOutputLayerNormWeightsInfo);
353  }
354 
355  if (!descriptor.m_CifgEnabled)
356  {
357  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
358  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
359  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
360 
361  // Set CIFG params info
362  aclParamsInfo.set_cifg_params(
363  &aclInputToInputWeightsInfo,
364  &aclRecurrentToInputWeightsInfo,
365  paramsInfo.m_CellToInputWeights != nullptr ? &aclCellToInputWeightsInfo : nullptr,
366  &aclInputGateBiasInfo);
367  }
368 
369  // Set scalar descriptor params
370  aclParamsInfo.set_cell_clip_params(descriptor.m_CellClip);
371  aclParamsInfo.set_projection_clip_params(descriptor.m_ProjectionClip);
372  aclParamsInfo.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
373  aclParamsInfo.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
374  descriptor.m_ForgetIntermediateScale,
375  descriptor.m_CellIntermediateScale,
376  descriptor.m_OutputIntermediateScale);
377 
378  // QLSTM CL validate
379  return arm_compute::CLQLSTMLayer::validate(&aclInputInfo,
380  &aclInputToForgetWeightsInfo,
381  &aclInputToCellWeightsInfo,
382  &aclInputToOutputWeightsInfo,
383  &aclRecurrentToForgetWeightsInfo,
384  &aclRecurrentToCellWeightsInfo,
385  &aclRecurrentToOutputWeightsInfo,
386  &aclForgetGateBiasInfo,
387  &aclCellBiasInfo,
388  &aclOutputGateBiasInfo,
389  &aclCellStateInInfo,
390  &aclOutputStateInInfo,
391  &aclCellStateOutInfo,
392  &aclOutputStateOutInfo,
393  &aclOutputInfo,
394  aclParamsInfo);
395 }

Referenced by ClLayerSupport::IsQLstmSupported().

◆ ClQuantizedLstmWorkloadValidate()

arm_compute::Status ClQuantizedLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo previousCellStateIn,
const TensorInfo previousOutputIn,
const TensorInfo cellStateOut,
const TensorInfo output,
const QuantizedLstmInputParamsInfo paramsInfo 
)

Definition at line 18 of file ClQuantizedLstmWorkload.cpp.

22 {
23  // Inputs
24  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
25  const arm_compute::TensorInfo aclPreviousCellStateInInfo = BuildArmComputeTensorInfo(previousCellStateIn);
26  const arm_compute::TensorInfo aclPreviousOutputInInfo = BuildArmComputeTensorInfo(previousOutputIn);
27 
28  // Outputs
29  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
30  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
31 
32  // Basic parameters
33  const arm_compute::TensorInfo aclInputToInputWeightsInfo
34  = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
35  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
36  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
37  const arm_compute::TensorInfo aclInputToCellWeightsInfo
38  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
39  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
40  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
41  const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo
42  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
43  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
44  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
45  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
46  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
47  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
48  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
49  const arm_compute::TensorInfo aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
50  const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
51  const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
52  const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
53 
54  return arm_compute::CLLSTMLayerQuantized::validate(&aclInputInfo, &aclInputToInputWeightsInfo,
55  &aclInputToForgetWeightsInfo, &aclInputToCellWeightsInfo,
56  &aclInputToOutputWeightsInfo, &aclRecurrentToInputWeightsInfo,
57  &aclRecurrentToForgetWeightsInfo, &aclRecurrentToCellWeightsInfo,
58  &aclRecurrentToOutputWeightsInfo, &aclInputGateBiasInfo,
59  &aclForgetGateBiasInfo, &aclCellBiasInfo, &aclOutputGateBiasInfo,
60  &aclPreviousCellStateInInfo, &aclPreviousOutputInInfo,
61  &aclCellStateOutInfo, &aclOutputInfo);
62 }

Referenced by ClLayerSupport::IsQuantizedLstmSupported().

◆ ClQuantizeWorkloadValidate()

arm_compute::Status ClQuantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 22 of file ClQuantizeWorkload.cpp.

24 {
25  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
27 
28  return arm_compute::CLQuantizationLayer::validate(&aclInputInfo,
29  &aclOutputInfo);
30 }

Referenced by ClLayerSupport::IsQuantizeSupported().

◆ ClReduceWorkloadValidate()

arm_compute::Status ClReduceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ReduceDescriptor descriptor 
)

Definition at line 18 of file ClReduceWorkload.cpp.

21 {
22  if (descriptor.m_vAxis.size() == 1 || descriptor.m_vAxis.empty())
23  {
24  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
25  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
28  input.GetNumDimensions(),
29  descriptor.m_vAxis);
30 
31  return arm_compute::CLReductionOperation::validate(&aclInputInfo,
32  &aclOutputInfo,
33  static_cast<unsigned int>(coords[0]),
35  descriptor.m_KeepDims);
36  }
37  else
38  {
39  // Validate layer if there are multiple axes.
40  arm_compute::Status status;
41  IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, descriptor, status);
42  return status;
43  }
44 }

References ReduceDescriptor::m_vAxis.

Referenced by ClLayerSupport::IsReduceSupported().

◆ ClReshapeWorkloadValidate()

arm_compute::Status ClReshapeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 15 of file ClReshapeWorkload.cpp.

17 {
18  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
19  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
20 
21  return arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
22 }

Referenced by ClLayerSupport::IsReshapeSupported().

◆ ClResizeWorkloadValidate()

arm_compute::Status ClResizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ResizeDescriptor descriptor 
)

Definition at line 22 of file ClResizeWorkload.cpp.

25 {
26  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
27  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
28 
29  arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout);
30  aclInputInfo.set_data_layout(aclDataLayout);
31  aclOutputInfo.set_data_layout(aclDataLayout);
32 
33  arm_compute::InterpolationPolicy aclInterpolationPolicy =
35 
36  arm_compute::SamplingPolicy samplingPolicy = descriptor.m_HalfPixelCenters ? arm_compute::SamplingPolicy::CENTER :
37  arm_compute::SamplingPolicy::TOP_LEFT;
38 
39  return arm_compute::CLScale::validate(&aclInputInfo,
40  &aclOutputInfo,
41  arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
42  arm_compute::BorderMode::REPLICATE,
43  arm_compute::PixelValue(0.f),
44  samplingPolicy,
45  true,
46  descriptor.m_AlignCorners));
47 }

Referenced by ClLayerSupport::IsResizeSupported().

◆ ClReverseV2WorkloadValidate()

arm_compute::Status ClReverseV2WorkloadValidate ( const TensorInfo input,
const TensorInfo axis,
const TensorInfo output 
)

Definition at line 16 of file ClReverseV2Workload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclAxis = BuildArmComputeTensorInfo(axis);
22  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::CLReverse::validate(&aclInput, &aclOutput, &aclAxis, true);
25 }

Referenced by ClLayerSupport::IsReverseV2Supported().

◆ ClRsqrtWorkloadValidate()

arm_compute::Status ClRsqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClRsqrtWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLRsqrtLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClSinWorkloadValidate()

arm_compute::Status ClSinWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClSinWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLSinLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClSliceWorkloadValidate()

arm_compute::Status ClSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SliceDescriptor descriptor 
)

Definition at line 18 of file ClSliceWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
27 
28  std::tie(starts, ends) = SetClSliceData(descriptor.m_Begin, descriptor.m_Size);
29 
30  return arm_compute::CLSlice::validate(&aclInput, &aclOutput, starts, ends);
31 }

Referenced by ClLayerSupport::IsSliceSupported().

◆ ClSoftmaxWorkloadValidate()

arm_compute::Status ClSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor 
)

Definition at line 17 of file ClSoftmaxWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
25  return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
26 }

Referenced by ClLayerSupport::IsSoftmaxSupported().

◆ ClSpaceToBatchNdWorkloadValidate()

arm_compute::Status ClSpaceToBatchNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToBatchNdDescriptor descriptor 
)

Definition at line 16 of file ClSpaceToBatchNdWorkload.cpp.

19 {
20  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
21  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
22 
23  arm_compute::Status statusSpaceToBatch = arm_compute::Status(arm_compute::ErrorCode::OK);
24  arm_compute::Status statusReshapeInput = arm_compute::Status(arm_compute::ErrorCode::OK);
25  arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
26 
27  arm_compute::TensorInfo aclReshapeInputInfo = aclInputInfo;
28  arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
29 
30  // When a spacial dimension is missing (rank=3) set W to 1
31  const unsigned int rank = input.GetNumDimensions();
32  if (rank == 3)
33  {
34  const arm_compute::TensorShape inputShape = aclInputInfo.tensor_shape();
35  const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
36 
37  if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
38  {
39  // In ACL dimensions are right to left: C, W, H, N
40  aclInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
41  aclOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
42  }
43  else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
44  {
45  // In ACL dimensions are right to left: W, H, C, N
46  aclInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
47  aclOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
48  }
49  else
50  {
51  throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
52  }
53 
54  statusReshapeInput = arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
55  statusReshapeOutput = arm_compute::CLReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
56  }
57 
58  // ArmNN blockShape is [H, W] ACl asks for W, H
59  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
60  int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
61 
62  unsigned int padLeft = (rank == 3) ? 0 : descriptor.m_PadList[1].first;
63  unsigned int padRight = (rank == 3) ? 0 : descriptor.m_PadList[1].second;
64  arm_compute::Size2D paddingLeftTop = BuildArmComputeSize2D(padLeft,
65  descriptor.m_PadList[0].first);
66  arm_compute::Size2D paddingRightBottom = BuildArmComputeSize2D(padRight,
67  descriptor.m_PadList[0].second);
68 
69  const arm_compute::Status aclStatus = arm_compute::CLSpaceToBatchLayer::validate(&aclInputInfo,
70  blockWidth,
71  blockHeight,
72  paddingLeftTop,
73  paddingRightBottom,
74  &aclOutputInfo);
75 
76  if (statusReshapeInput.error_code() == arm_compute::ErrorCode::OK &&
77  statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
78  statusSpaceToBatch.error_code() == arm_compute::ErrorCode::OK)
79  {
80  return arm_compute::Status(arm_compute::ErrorCode::OK,
81  "All SpaceToBatch layers validate status OK.");
82  }
83  else
84  {
85  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
86  "SpaceToBatch layer validate status failed."
87  + statusSpaceToBatch.error_description()
88  + statusReshapeInput.error_description()
89  + statusReshapeOutput.error_description());
90  }
91 }

Referenced by ClLayerSupport::IsSpaceToBatchNdSupported().

◆ ClSpaceToDepthWorkloadValidate()

arm_compute::Status ClSpaceToDepthWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToDepthDescriptor descriptor 
)

Definition at line 54 of file ClSpaceToDepthWorkload.cpp.

57 {
58  DataLayout dataLayout = descriptor.m_DataLayout;
59  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
60 
61  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
62 
63  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
64 
65  const arm_compute::Status aclStatus = arm_compute::CLSpaceToDepthLayer::validate(&aclInputInfo,
66  &aclOutputInfo,
67  blockSize);
68  return aclStatus;
69 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsSpaceToDepthSupported().

◆ ClSplitterWorkloadValidate()

arm_compute::Status ClSplitterWorkloadValidate ( const TensorInfo input,
const std::vector< std::reference_wrapper< TensorInfo >> &  outputs,
unsigned int  splitAxis 
)

Definition at line 31 of file ClSplitterWorkload.cpp.

34 {
35  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
36 
37  size_t numOutputs = outputs.size();
38 
39  std::vector<arm_compute::TensorInfo> aclOutputs;
40  aclOutputs.reserve(numOutputs);
41 
42  std::vector<arm_compute::ITensorInfo*> aclOutputPtr;
43  aclOutputPtr.reserve(numOutputs);
44 
45  for (size_t i = 0u; i < outputs.size(); ++i)
46  {
47  aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i]));
48  aclOutputPtr.emplace_back(&aclOutputs.back());
49  }
50 
51  unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis);
52  return arm_compute::CLSplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
53 }

Referenced by ClLayerSupport::IsSplitterSupported().

◆ ClSqrtWorkloadValidate()

arm_compute::Status ClSqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file ClSqrtWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  ActivationDescriptor descriptor;
25  descriptor.m_Function = ActivationFunction::Sqrt;
26  const arm_compute::ActivationLayerInfo activationLayerInfo =
28 
29  return arm_compute::CLActivationLayer::validate(&aclInput, &aclOutput, activationLayerInfo);
30 }

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

◆ ClStackWorkloadValidate()

arm_compute::Status ClStackWorkloadValidate ( const std::vector< const TensorInfo * > &  inputs,
const TensorInfo output,
const StackDescriptor descriptor 
)

Definition at line 29 of file ClStackWorkload.cpp.

32 {
33  std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
34  arm_compute::TensorInfo aclInputInfo;
35  for (const TensorInfo* input : inputs)
36  {
37  aclInputInfo = BuildArmComputeTensorInfo(*input);
38  aclInputPtrs.emplace_back(&aclInputInfo);
39  }
40  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
41 
42  int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
43 
44  return arm_compute::CLStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
45 }

Referenced by ClLayerSupport::IsStackSupported().

◆ ClStridedSliceWorkloadValidate()

arm_compute::Status ClStridedSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const StridedSliceDescriptor descriptor 
)

Definition at line 27 of file ClStridedSliceWorkload.cpp.

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
33 
37 
38  std::tie(starts, ends, strides) = SetClStridedSliceData(descriptor.m_Begin, descriptor.m_End, descriptor.m_Stride);
39 
40  auto numDimensions = armnn::numeric_cast<int>(input.GetNumDimensions());
41  int32_t begin_mask = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions);
42  int32_t end_mask = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions);
43  int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions);
44 
45  return arm_compute::CLStridedSlice::validate(&aclInputInfo,
46  &aclOutputInfo,
47  starts,
48  ends,
49  strides,
50  begin_mask,
51  end_mask,
52  shrink_axis_mask);
53 }

Referenced by ClLayerSupport::IsStridedSliceSupported().

◆ ClSubtractionValidate()

arm_compute::Status ClSubtractionValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 46 of file ClSubtractionWorkload.cpp.

50 {
51  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
52  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
53  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
54 
55  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
56  activationDescriptor);
57 
58  const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info,
59  &aclInput1Info,
60  &aclOutputInfo,
61  g_AclConvertPolicy,
62  activationInfo);
63 
64  return aclStatus;
65 }

Referenced by ClLayerSupport::IsLayerSupported(), ClLayerSupport::IsSubtractionSupported(), and ClBackend::OptimizeSubgraphView().

◆ ClTensorHandleFactoryId()

constexpr const char* armnn::ClTensorHandleFactoryId ( )
constexpr

Definition at line 15 of file ClTensorHandleFactory.hpp.

16 {
17  return "Arm/Cl/TensorHandleFactory";
18 }

Referenced by ClTensorHandleFactory::GetIdStatic().

◆ ClTileWorkloadValidate()

arm_compute::Status ClTileWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TileDescriptor descriptor 
)

Definition at line 16 of file ClTileWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
22 
23  std::vector<uint32_t> aclMultiples = descriptor.m_Multiples;
24  std::reverse(aclMultiples.begin(),aclMultiples.end());
25 
26  return arm_compute::CLTile::validate(&aclInput, &aclOutput, aclMultiples);
27 }

Referenced by ClLayerSupport::IsTileSupported().

◆ ClTransposeConvolution2dWorkloadValidate()

arm_compute::Status ClTransposeConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 26 of file ClTransposeConvolution2dWorkload.cpp.

31 {
32  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
33  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
34  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
35 
36  arm_compute::TensorInfo aclBiasesInfo;
37  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
38 
39  if (descriptor.m_BiasEnabled)
40  {
41  ARMNN_ASSERT(biases.has_value());
42 
43  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
44  optionalAclBiasesInfo = &aclBiasesInfo;
45  }
46 
47  arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(descriptor);
48 
49  return arm_compute::CLDeconvolutionLayer::validate(&aclInputInfo,
50  &aclWeightsInfo,
51  optionalAclBiasesInfo,
52  &aclOutputInfo,
53  padStrideInfo);
54 }

Referenced by ClLayerSupport::IsTransposeConvolution2dSupported().

◆ ClTransposeWorkloadValidate()

arm_compute::Status ClTransposeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeDescriptor descriptor 
)

Definition at line 17 of file ClTransposeWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
24 
25  return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo,
26  armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
27 }

Referenced by ClLayerSupport::IsTransposeSupported().

◆ ClUnidirectionalSequenceLstmFloatWorkloadValidate()

arm_compute::Status ClUnidirectionalSequenceLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const UnidirectionalSequenceLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 508 of file ClUnidirectionalSequenceLstmFloatWorkload.cpp.

516 {
517  TensorShape inputLayerShape = input.GetShape();
518  TensorShape outputLayerShape = output.GetShape();
519 
520  if (inputLayerShape.GetNumDimensions() != 3)
521  {
522  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
523  "Unidirectional Sequence LSTM layer validate status failed.");
524  }
525 
526  unsigned int maxTime = descriptor.m_TimeMajor?inputLayerShape[0]:inputLayerShape[1];
527  unsigned int batchSize = descriptor.m_TimeMajor?inputLayerShape[1]:inputLayerShape[0];
528  unsigned int inputSize = inputLayerShape[2];
529  unsigned int outputSize = outputLayerShape[2];
530 
531  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
532  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
533 
534  arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
535  "Permute1 status");
536  arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
537  "Split status");
538  arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
539  "LSTM status");
540  arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
541  "Concat status");
542  arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
543  "Permute2 status");
544 
545  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
546  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
547 
548  //
549  // Permute validate
550  //
551  TensorInfo permuteOutInfo = armnnUtils::Permuted(input, { 1U, 0U, 2U });
552  arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
553  if (!descriptor.m_TimeMajor)
554  {
555  statusPermute1 = arm_compute::CLPermute::validate(&aclInputInfo,
556  &aclPermuteOutInfo,
557  arm_compute::PermutationVector(0U, 2U, 1U));
558  }
559 
560  //
561  // Split and Concat Tensors validate
562  //
563  std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
564  std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
565  std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
566  std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
567  splitterOutputsTensorInfos.reserve(maxTime);
568  concatInputsTensorInfos.reserve(maxTime);
569  for (unsigned int i = 0; i < maxTime; ++i)
570  {
571  arm_compute::TensorInfo splitter_out;
572  arm_compute::TensorInfo concat_in;
573 
574  auto splitterTensorInfo = TensorInfo(input);
575  auto concatTensorInfo = TensorInfo(output);
576  splitterTensorInfo.SetShape({batchSize, inputSize});
577  concatTensorInfo.SetShape({batchSize, outputSize});
578 
579  arm_compute::TensorInfo aclSplitterTensorInfo
580  = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
581  arm_compute::TensorInfo aclConcatTensorInfo
582  = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
583 
584  splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
585  concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
586  splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
587  concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
588  }
589 
590  //
591  // Split validate
592  //
593  unsigned int numberDimensions = 3;
594  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
595  unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
596 
597  if (maxTime != 1) // ACL split does not work with only one element to split.
598  {
599  if (!descriptor.m_TimeMajor)
600  {
601  statusSplit = arm_compute::CLSplit::validate(&aclPermuteOutInfo,
602  splitterOutputsTensorInfosPtr,
603  aclAxisSplit);
604  }
605  else
606  {
607  statusSplit = arm_compute::CLSplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
608  }
609  }
610 
611  //
612  // LSTM validate
613  //
614 
615  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
616 
617  unsigned int numUnits = cellStateIn.GetShape()[1];
618  unsigned int scratchBufferFactor = 4;
619 
620  if (descriptor.m_CifgEnabled)
621  {
622  // scratchBuffer = { batchSize, numUnits * 3 } with CIFG
623  scratchBufferFactor = 3;
624  }
625 
626  const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());
627 
628  // The inputs and outputs
629  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
630  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
631  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
632  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
633  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
634 
635  // Basic parameters
636  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
637  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
638  const arm_compute::TensorInfo aclInputToCellWeightsInfo
639  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
640  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
641  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
642  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
643  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
644  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
645  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
646  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
647  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
648  const arm_compute::TensorInfo aclForgetGateBiasInfo
649  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
650  const arm_compute::TensorInfo aclCellBiasInfo
651  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
652  const arm_compute::TensorInfo aclOutputGateBiasInfo
653  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
654 
655  arm_compute::TensorInfo aclInputToInputWeightsInfo;
656  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
657  arm_compute::TensorInfo aclCellToInputWeightsInfo;
658  arm_compute::TensorInfo aclInputGateBiasInfo;
659  arm_compute::TensorInfo aclProjectionWeightsInfo;
660  arm_compute::TensorInfo aclProjectionBiasInfo;
661  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
662  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
663 
664  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
665  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
666  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
667  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
668 
669 
670  if (!descriptor.m_CifgEnabled)
671  {
672  if (descriptor.m_PeepholeEnabled)
673  {
674  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
675  }
676  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
677  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
678  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
679 
680  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
681  &aclRecurrentToInputWeightsInfo,
682  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
683  &aclInputGateBiasInfo);
684  }
685 
686  if (descriptor.m_ProjectionEnabled)
687  {
688  if (paramsInfo.m_ProjectionBias != nullptr)
689  {
690  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
691  }
692  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
693 
694  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
695  paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
696  }
697 
698  if (descriptor.m_PeepholeEnabled)
699  {
700  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
701  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
702 
703  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
704  }
705 
706  if (descriptor.m_LayerNormEnabled)
707  {
708  if (!descriptor.m_CifgEnabled)
709  {
710  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
711  }
712  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
713  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
714  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
715 
716  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
717  &aclInputLayerNormWeightsInfo,
718  &aclForgetLayerNormWeightsInfo,
719  &aclCellLayerNormWeightsInfo,
720  &aclOutputLayerNormWeightsInfo);
721  }
722 
723  // Need to be set at negative threshold to be compatible for ACL
724  float cell_threshold = descriptor.m_ClippingThresCell;
725  float projection_threshold = descriptor.m_ClippingThresProj;
726 
727  arm_compute::ActivationLayerInfo activationLayerInfo =
728  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
729 
730  for (unsigned int i = 0; i != maxTime; ++i)
731  {
732 
733  // Set LSTM input and output ITensors depending on:
734  // input format (timeMajor) & number of LSTM batches (maxTime).
735  arm_compute::ITensorInfo* outputLSTM;
736  arm_compute::ITensorInfo* inputLSTM;
737  // If there is only one LSTM time major batch, we will not concat OR permute.
738  // Set input of LSTM to be first input ITensor.
739  // Set output of LSTM to be final output ITensor.
740  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
741  if (maxTime == 1 && !descriptor.m_TimeMajor)
742  {
743  TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
744  TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
745  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
746  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
747  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
748  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
749  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
750  inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
751  const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
752  outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
753  }
754  // If there is only one LSTM batch major batch, we will not concat, only permute.
755  // Set input of LSTM to be output of initial permute.
756  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
757  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
758  else if (maxTime == 1 && !descriptor.m_TimeMajor)
759  {
760  TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
761  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
762  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
763  aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
764  inputLSTM = &aclPermuteOutInfo;
765  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
766  }
767  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
768  else
769  {
770  inputLSTM = splitterOutputsTensorInfosPtr[i];
771  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
772  }
773 
774  statusLSTM = arm_compute::CLLSTMLayer::validate(inputLSTM,
775  &aclInputToForgetWeightsInfo,
776  &aclInputToCellWeightsInfo,
777  &aclInputToOutputWeightsInfo,
778  &aclRecurrentToForgetWeightsInfo,
779  &aclRecurrentToCellWeightsInfo,
780  &aclRecurrentToOutputWeightsInfo,
781  &aclForgetGateBiasInfo,
782  &aclCellBiasInfo,
783  &aclOutputGateBiasInfo,
784  &aclOutputStateInInfo,
785  &aclCellStateInInfo,
786  &aclScratchBufferInfo,
787  &aclOutputStateOutInfo,
788  &aclCellStateOutInfo,
789  outputLSTM,
790  lstm_params_info,
791  activationLayerInfo,
792  cell_threshold,
793  projection_threshold);
794 
795  if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
796  {
797  break;
798  }
799  }
800 
801  //
802  // Concat validate
803  //
804 
805  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
806  TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
807  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
808  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
809 
810  TensorInfo concatOuputTensorInfo = TensorInfo(output);
811  concatOuputTensorInfo.SetShape(timeMajorShapeOutput);
812  arm_compute::TensorInfo aclConcatOuputTensorInfo= BuildArmComputeTensorInfo(concatOuputTensorInfo);
813 
814  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
815  {
816  for (unsigned int i = 0; i < maxTime; ++i)
817  {
818  auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
819  concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
820  }
821 
822  unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
823  if (!descriptor.m_TimeMajor)
824  {
825  statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
826  &aclConcatOuputTensorInfo,
827  aclAxisConcat);
828  }
829  else
830  {
831  statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
832  &aclOutputInfo,
833  aclAxisConcat);
834  }
835  }
836  // If only one LSTM batch, we do not concat and/or permute.
837  // Must ensure final output info is expanded to correct batch major dimensions.
838  else
839  {
840  if (!descriptor.m_TimeMajor)
841  {
842  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
843  BuildArmComputeTensorShape(shapeExpandBatchMajor));
844  }
845  else
846  {
847  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
848  BuildArmComputeTensorShape(shapeExpandTimeMajor));
849  }
850  }
851  //
852  // Permute validate
853  //
854  if (!descriptor.m_TimeMajor)
855  {
856  // Output now time major. Permute output back to batch major.
857  if (maxTime != 1)
858  {
859  statusPermute2 = arm_compute::CLPermute::validate(&aclConcatOuputTensorInfo,
860  &aclOutputInfo,
861  arm_compute::PermutationVector(0U, 2U, 1U));
862  }
863  else
864  {
865  statusPermute2 = arm_compute::CLPermute::validate(concatInputsTensorInfosPtr[0],
866  &aclOutputInfo,
867  arm_compute::PermutationVector(0U, 2U, 1U));
868  }
869  }
870 
871  auto okCode = arm_compute::ErrorCode::OK;
872  if (statusPermute1.error_code() == okCode &&
873  statusSplit.error_code() == okCode &&
874  statusLSTM .error_code() == okCode &&
875  statusConcat.error_code() == okCode &&
876  statusPermute2.error_code() == okCode)
877  {
878  return arm_compute::Status(arm_compute::ErrorCode::OK,
879  "All Unidirectional Sequence LSTM layer validate status OK.");
880  }
881  else
882  {
883  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
884  "Unidirectional Sequence LSTM layer validate status failed.");
885  }
886 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by ClLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ CollapseLeadingUnitDimensions()

bool armnn::CollapseLeadingUnitDimensions ( const TensorInfo in,
TensorInfo out 
)
inline

Definition at line 14 of file NeonBackendOptimizationUtils.hpp.

15 {
16  unsigned int numDimensions = in.GetNumDimensions();
17  for (unsigned int i = 0; i < (numDimensions-1); ++i)
18  {
19  if (in.GetShape()[i] != 1)
20  {
21  return false;
22  }
23  }
24 
25  unsigned int w = in.GetShape()[numDimensions-1];
26  out = in;
27  out.SetShape({w});
28 
29  return true;
30 }

References TensorInfo::GetNumDimensions(), TensorInfo::GetShape(), and TensorInfo::SetShape().

Referenced by BuildAddMulAddTensorInfoLists().

◆ Combine() [1/2]

MemorySourceFlags armnn::Combine ( Arg  source,
Args...  rest 
)

Definition at line 36 of file MemorySources.hpp.

37 {
38  return static_cast<MemorySourceFlags>(source) | Combine(rest...);
39 }

References Combine().

◆ Combine() [2/2]

MemorySourceFlags armnn::Combine ( Arg  sourceA,
Arg  sourceB 
)

Definition at line 30 of file MemorySources.hpp.

31 {
32  return static_cast<MemorySourceFlags>(sourceA) | static_cast<MemorySourceFlags>(sourceB);
33 }

Referenced by Combine().

◆ ComputeAclAxis()

int armnn::ComputeAclAxis ( const int &  armnnAxis,
const armnn::TensorInfo tensor 
)
inline

Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank)

Definition at line 273 of file ArmComputeUtils.hpp.

274 {
275  int rank = static_cast<int>(tensor.GetNumDimensions());
276 
277  ARMNN_ASSERT(rank != 0);
278  ARMNN_ASSERT((-1 * rank) <= armnnAxis);
279  ARMNN_ASSERT(armnnAxis < rank);
280 
281  int sign = (armnnAxis < 0) ? -1 : 1;
282  int aclAxis = sign * rank - 1 - armnnAxis;
283 
284  return aclAxis;
285 }

References ARMNN_ASSERT, and TensorInfo::GetNumDimensions().

Referenced by ClGatherWorkload::ClGatherWorkload(), ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(), ClSoftmaxWorkload::ClSoftmaxWorkload(), NeonGatherWorkload::NeonGatherWorkload(), NeonLogSoftmaxWorkload::NeonLogSoftmaxWorkload(), and NeonSoftmaxWorkload::NeonSoftmaxWorkload().

◆ ComputeConv3DInfo() [1/2]

arm_compute::Conv3dInfo armnn::ComputeConv3DInfo ( const armnn::Convolution3dDescriptor  descriptor,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)
inline

Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor.

Definition at line 288 of file ArmComputeUtils.hpp.

291 {
292  const arm_compute::Size3D stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
293  const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
294  descriptor.m_PadTop, descriptor.m_PadBottom,
295  descriptor.m_PadFront, descriptor.m_PadBack};
296  const arm_compute::Size3D dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
297 
298  const arm_compute::ActivationLayerInfo activationInfo =
300  const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
301 
302  return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
303 }

References ConvertActivationDescriptorToAclActivationLayerInfo(), Convolution3dDescriptor::m_DilationX, Convolution3dDescriptor::m_DilationY, Convolution3dDescriptor::m_DilationZ, Convolution3dDescriptor::m_PadBack, Convolution3dDescriptor::m_PadBottom, Convolution3dDescriptor::m_PadFront, Convolution3dDescriptor::m_PadLeft, Convolution3dDescriptor::m_PadRight, Convolution3dDescriptor::m_PadTop, Convolution3dDescriptor::m_StrideX, Convolution3dDescriptor::m_StrideY, and Convolution3dDescriptor::m_StrideZ.

◆ ComputeConv3DInfo() [2/2]

arm_compute::Conv3dInfo armnn::ComputeConv3DInfo ( const armnn::Convolution3dQueueDescriptor  queueDescriptor,
bool  isFastMathEnabled 
)
inline

Definition at line 305 of file ArmComputeUtils.hpp.

307 {
308  auto descriptor = queueDescriptor.m_Parameters;
309  const arm_compute::Size3D stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
310  const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
311  descriptor.m_PadTop, descriptor.m_PadBottom,
312  descriptor.m_PadFront, descriptor.m_PadBack};
313  const arm_compute::Size3D dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
314 
315  const arm_compute::ActivationLayerInfo activationInfo =
317  const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
318 
319  return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
320 }

References ConvertAdditionalInfoToAclActivationLayerInfo(), QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Convolution3dDescriptor::m_StrideX.

◆ ComputeReductionTensorShape()

const TensorInfo armnn::ComputeReductionTensorShape ( const armnn::TensorInfo input,
const std::vector< uint32_t > &  vAxis,
const bool  keepDims 
)
inline

Function to compute the output tensor shape based on the axes and if keepDims is set.

Definition at line 347 of file ArmComputeUtils.hpp.

350 {
351  auto reducedTensorInfo = input;
352  unsigned int rank = reducedTensorInfo.GetNumDimensions();
353  unsigned int outputRank = 0;
354  // Calculate output dimension
355  if (keepDims)
356  {
357  outputRank = rank;
358  }
359  else if (vAxis.empty())
360  {
361  outputRank = 1;
362  }
363  else if (vAxis.size() > reducedTensorInfo.GetNumDimensions())
364  {
365  throw LayerValidationException("ReduceLayer: Dimensions to reduce can not be bigger than input dimensions");
366  }
367  else
368  {
369  outputRank = reducedTensorInfo.GetNumDimensions() - armnn::numeric_cast<unsigned int>(vAxis.size());
370  if (outputRank == 0)
371  {
372  outputRank = 1;
373  }
374  }
375  std::vector<unsigned int> dimSizes(outputRank, 1);
376  if (!vAxis.empty())
377  {
378  // Skip the dimension that has been reduced unless keepDims is true.
379  unsigned int outputIndex = 0;
380  for (unsigned int i = 0; i < reducedTensorInfo.GetNumDimensions(); ++i)
381  {
382  if (std::find(vAxis.begin(), vAxis.end(), i) == vAxis.end())
383  {
384  dimSizes[outputIndex] = armnn::numeric_cast<unsigned int>(reducedTensorInfo.GetShape()[i]);
385  ++outputIndex;
386  }
387  else if (keepDims)
388  {
389  dimSizes[outputIndex] = 1;
390  ++outputIndex;
391  }
392  }
393  }
394  const TensorShape inferredShape = TensorShape(outputRank, dimSizes.data());
395  reducedTensorInfo.SetShape(inferredShape);
396  return reducedTensorInfo;
397 }

References TensorInfo::GetNumDimensions().

Referenced by ChainReduceLayers().

◆ ComputeSoftmaxAclAxis()

T armnn::ComputeSoftmaxAclAxis ( const SoftmaxDescriptor softmaxDesc,
const armnn::TensorInfo tensor 
)
inline

Definition at line 227 of file ArmComputeUtils.hpp.

228 {
229  // Detect the Android default value of -1 and return the ACL default value of 0.
230  if (softmaxDesc.m_Axis == -1)
231  {
232  return 0;
233  }
234 
235  unsigned int dim = tensor.GetNumDimensions();
236 
237  ARMNN_ASSERT(dim != 0);
238 
239  // Currently ArmNN support axis 1.
240  auto aclAxis = (static_cast<T>(dim) - 1);
241  aclAxis = aclAxis > 0 ? aclAxis -1 : aclAxis;
242 
243  return aclAxis;
244 }

References ARMNN_ASSERT, TensorInfo::GetNumDimensions(), and SoftmaxDescriptor::m_Axis.

◆ ComputeSplitAxis()

std::set<unsigned int> armnn::ComputeSplitAxis ( const armnn::SplitterDescriptor desc,
const TensorShape input 
)
inline

Definition at line 246 of file ArmComputeUtils.hpp.

247 {
248  unsigned int numSplit = desc.GetNumViews();
249  unsigned int numDimensions = desc.GetNumDimensions();
250  std::set<unsigned int> splitAxis;
251 
252  if (desc.HasAxis())
253  {
254  splitAxis.insert(armnnUtils::GetUnsignedAxis(desc.GetNumDimensions(), desc.GetAxis()));
255  }
256  else
257  {
258  for (unsigned int i = 0; i < numSplit; ++i)
259  {
260  for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
261  {
262  if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
263  {
264  splitAxis.insert(dimIdx);
265  }
266  }
267  }
268  }
269  return splitAxis;
270 }

References ViewsDescriptor::GetAxis(), ViewsDescriptor::GetNumDimensions(), ViewsDescriptor::GetNumViews(), armnnUtils::GetUnsignedAxis(), ViewsDescriptor::GetViewSizes(), and ViewsDescriptor::HasAxis().

Referenced by ClSplitterWorkload::ClSplitterWorkload(), ClLayerSupport::IsSplitterSupported(), NeonLayerSupport::IsSplitterSupported(), and NeonSplitterWorkload::NeonSplitterWorkload().

◆ Concatenate()

void Concatenate ( const ConcatQueueDescriptor data,
std::vector< ITensorHandle * >  inputs,
std::vector< ITensorHandle * >  outputs 
)

Definition at line 14 of file Concatenate.cpp.

17 {
18  const TensorInfo& outputInfo0 = GetTensorInfo(outputs[0]);
19 
20  std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo0, outputs[0]->Map());
21  Encoder<float>& encoder = *encoderPtr;
22 
23  for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index)
24  {
25  unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
26 
27  unsigned int indexRemainder = index;
28  unsigned int dimensionStride = outputInfo0.GetNumElements();
29 
30  for (unsigned int i = 0; i < outputInfo0.GetNumDimensions(); i++)
31  {
32  dimensionStride /= outputInfo0.GetShape()[i];
33  indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
34  indexRemainder -= indices[i] * dimensionStride;
35  }
36 
37  for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
38  {
39  ConcatQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
40 
41  //Split view extents are defined by the size of (the corresponding) input tensor.
42  const TensorInfo& inputInfo = GetTensorInfo(inputs[viewIdx]);
43  ARMNN_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions());
44 
45  // Check all dimensions to see if this element is inside the given input view.
46  bool insideView = true;
47  for (unsigned int i = 0; i < inputInfo.GetNumDimensions(); i++)
48  {
49  if (indices[i] < view.m_Origin[i])
50  {
51  insideView = false;
52  }
53  if (indices[i] >= view.m_Origin[i] + inputInfo.GetShape()[i])
54  {
55  insideView = false;
56  }
57  }
58 
59  if (insideView)
60  {
61  std::unique_ptr<Decoder<float>> decoderPtr =
62  MakeDecoder<float>(inputInfo,inputs[viewIdx]->Map());
63  Decoder<float>& decoder = *decoderPtr;
64  unsigned int inIndex = 0;
65  unsigned int dimensionStride = 1;
66 
67  for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;)
68  {
69  inIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
70  dimensionStride *= inputInfo.GetShape()[i];
71  }
72  decoder += inIndex;
73  encoder.Set(decoder.Get());
74 
75  //What should we do if input views overlap on the output tensor?
76  //We could error, take the average, or shm else...
77  //For now just stop after finding first view (input) that matches.
78  break;
79  }
80  }
81  ++encoder;
82  }
83 }

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), ConcatQueueDescriptor::ViewOrigin::m_Origin, ConcatQueueDescriptor::m_ViewOrigins, Map, MaxNumOfTensorDimensions, and Encoder< IType >::Set().

◆ ConditionalThrow() [1/2]

void armnn::ConditionalThrow ( bool  condition)

Definition at line 174 of file Exceptions.hpp.

175 {
176  if (!condition)
177  {
178  throw ExceptionType();
179  }
180 }

◆ ConditionalThrow() [2/2]

void armnn::ConditionalThrow ( bool  condition,
const std::string &  message 
)

Definition at line 165 of file Exceptions.hpp.

166 {
167  if (!condition)
168  {
169  throw ExceptionType(message);
170  }
171 }

◆ ConditionalThrowIfNotEqual()

void armnn::ConditionalThrowIfNotEqual ( const std::string &  message,
const ComparedType &  leftHandSide,
const ComparedType &  rightHandSide 
)

ComparedType must support: operator==(const ComparedType&) operator<<(ostream&, const ComparedType&)

Definition at line 189 of file Exceptions.hpp.

192 {
193  if (!(leftHandSide == rightHandSide))
194  {
195  std::stringstream ss;
196  ss << message << " : " << leftHandSide << " != " << rightHandSide;
197  throw ExceptionType(ss.str());
198  }
199 }

◆ ConfigureDetailsObject()

void armnn::ConfigureDetailsObject ( JsonChildObject detailsObject,
std::string  layerDetailsStr 
)

Definition at line 295 of file Profiling.cpp.

297 {
298  detailsObject.SetType(JsonObjectType::ExecObjectDesc);
299  detailsObject.SetAndParseDetails(layerDetailsStr);
300 
301 }

References ExecObjectDesc, JsonChildObject::SetAndParseDetails(), and JsonChildObject::SetType().

◆ ConfigureLogging()

void ConfigureLogging ( bool  printToStandardOutput,
bool  printToDebugOutput,
LogSeverity  severity 
)

Configures the logging behaviour of the ARMNN library.

printToStandardOutput: Set to true if log messages should be printed to the standard output. printToDebugOutput: Set to true if log messages be printed to a platform-specific debug output (where supported). severity: All log messages that are at this severity level or higher will be printed, others will be ignored.

Examples
AsyncExecutionSample.cpp, CustomMemoryAllocatorSample.cpp, and SimpleSample.cpp.

Definition at line 18 of file Utils.cpp.

19 {
20  SetAllLoggingSinks(printToStandardOutput, printToDebugOutput, false);
21  SetLogFilter(severity);
22 }

References SetAllLoggingSinks(), and SetLogFilter().

Referenced by ArmnnDevice::ArmnnDevice(), ConfigureLoggingTest(), and main().

◆ ConfigureTuner()

void armnn::ConfigureTuner ( arm_compute::CLTuner &  tuner,
TuningLevel  level 
)
inline

Definition at line 44 of file ArmComputeTuningUtils.hpp.

45 {
46  tuner.set_tune_new_kernels(true); // Turn on tuning initially.
47 
48  switch (level)
49  {
50  case TuningLevel::Rapid:
51  ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)";
52  tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID);
53  break;
54  case TuningLevel::Normal:
55  ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)";
56  tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL);
57  break;
58  case TuningLevel::Exhaustive:
59  ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)";
60  tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE);
61  break;
62  case TuningLevel::None:
63  default:
64  tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode.
65  break;
66  }
67 }

References ARMNN_LOG, Exhaustive, info, None, Normal, and Rapid.

Referenced by ClBackendContext::ClBackendContext(), and GpuFsaBackendContext::GpuFsaBackendContext().

◆ ConnectedToLayerType()

bool armnn::ConnectedToLayerType ( Layer baseLayer,
LayerType  layerType,
unsigned int  dimSize = 0 
)
inline

Checks the Layer's Connections to see if it's connected to a Layer with the provided layerType.

If dimSize is provided will also check if the connecting Tensor has more than that number of dimensions

Definition at line 271 of file SubgraphUtils.hpp.

272 {
273  Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
274  TensorInfo parentTensorInfo = baseLayer->GetInputSlot(0).GetTensorInfo();
275 
276  if (parentTensorInfo.GetNumDimensions() > dimSize && parentLayer.GetType() == layerType)
277  {
278  return true;
279  }
280  for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
281  {
282  Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
283  TensorInfo nextTensorInfo = baseLayer->GetOutputSlot(0).GetConnection(i)->GetTensorInfo();
284 
285  if (nextTensorInfo.GetNumDimensions() > dimSize && nextLayer.GetType() == layerType)
286  {
287  return true;
288  }
289  }
290  return false;
291 }

References InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), Layer::GetInputSlot(), TensorInfo::GetNumDimensions(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), InputSlot::GetTensorInfo(), and Layer::GetType().

◆ ConnectedToLayerWithNCHW()

bool armnn::ConnectedToLayerWithNCHW ( Layer baseLayer)
inline

Checks if the Layer is connected to any Layer that has an NCHW layout.

Definition at line 250 of file SubgraphUtils.hpp.

251 {
252  Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
253 
254  if (IsNCHW(parentLayer))
255  {
256  return true;
257  }
258  for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
259  {
260  Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
261  if (IsNCHW(nextLayer))
262  {
263  return true;
264  }
265  }
266  return false;
267 }

References InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), Layer::GetInputSlot(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), and IsNCHW().

Referenced by NeonBackend::OptimizeSubgraphView(), and ClBackend::OptimizeSubgraphView().

◆ Convert1HWOTensorInfoToAcl()

std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl ( const TensorInfo weightInfo,
const TensorInfo inputInfo,
const DataLayout  dataLayout 
)

Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library Returns a tuple of converted weights tensor info and depth multiplier.

Definition at line 176 of file WorkloadUtils.cpp.

179 {
180  unsigned int aclDepthMultiplier = 1;
181  TensorInfo weightsPermuted;
182  if (dataLayout == armnn::DataLayout::NHWC)
183  {
184  // No permutation required. Input and weights data layouts are the same.
185  aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[3];
186  weightsPermuted = weightInfo;
187  }
188 
189  else if (dataLayout == armnn::DataLayout::NCHW)
190  {
191  // Weights permutation required. Weights [N,H,W,C] and input [N,C,H,W] data layouts are different.
192  // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
193  aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[1];
194  PermutationVector permutationVector{ 0, 2, 3, 1 };
195  weightsPermuted = armnnUtils::Permuted(weightInfo, permutationVector);
196  }
197  else
198  {
199  throw InvalidArgumentException(fmt::format("Unknown data layout for tensor info conversion: {}",
200  GetDataLayoutName(dataLayout)));
201  }
202 
203  return std::make_tuple(weightsPermuted, aclDepthMultiplier);
204 }

References GetDataLayoutName(), TensorInfo::GetShape(), NCHW, NHWC, and armnnUtils::Permuted().

◆ Convert1HWOTensorToAcl()

std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl ( const ConstTensorHandle weightTensor,
const TensorInfo inputInfo,
const DataLayout  dataLayout,
void *  permuteBuffer 
)

Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library.

Parameters
weightTensor- ConstTensorHandle of weights tensor
inputInfo- TensorInfo of input tensor
dataLayout- DataLayout of the input tensor
permuteBuffer- Pointer to memory with the size of tensor. Used for the permutation
Returns
tuple of transformed weights-ConstTensor and depthwise multiplier

Definition at line 145 of file WorkloadUtils.cpp.

149 {
150  TensorInfo weightsInfo = weightTensor->GetTensorInfo();
151  unsigned int depthMultiplier = 1;
152  PermutationVector permutationVector{};
153  if (dataLayout == armnn::DataLayout::NHWC)
154  {
155  // No permutation required. Data layouts are the same.
156 
157  depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[3];
158  }
159  else if (dataLayout == armnn::DataLayout::NCHW)
160  {
161  // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
162  depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[1];
163  permutationVector = { 0, 2, 3, 1 };
164  }
165  else
166  {
167  throw InvalidArgumentException(fmt::format("Unknown data layout for tensor conversion: {}",
168  GetDataLayoutName(dataLayout)));
169  }
170 
171  ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
172 
173  return std::make_tuple(weightsPermuted, depthMultiplier);
174 }

References GetDataLayoutName(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), NCHW, NHWC, and PermuteTensor().

◆ Convert1HWOtoMIHW()

std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW ( const ConstTensorHandle weightTensor,
const TensorInfo inputInfo,
const DataLayout dataLayout,
void *  permuteBuffer 
)

Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W].

Parameters
weightTensor- ConstTensorHandle of the weight tensor that should be converted
inputInfo- TensorInfo of the corresponding input tensor
dataLayout- DataLayout of the input tensor e.g. NHWC or NCHW
permuteBuffer- Memory location with the same size as the weight tensor to write converted data to
Returns
- A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier

Definition at line 207 of file WorkloadUtils.cpp.

211 {
212  TensorInfo weightsInfo = weightTensor->GetTensorInfo();
213 
214  if (weightsInfo.HasPerAxisQuantization())
215  {
216  throw InvalidArgumentException("Can't convert tensor from [1,H,W,Cout] to [M,Cin,H,W] when per channel "
217  "quantization is applied.");
218  }
219 
220  // Reshape weights [ 1, H, W, I*M ] --> [ H, W, I, M ]
221  auto weightsShape = weightsInfo.GetShape();
222  auto channelIndex = armnnUtils::DataLayoutIndexed(dataLayout).GetChannelsIndex();
223  unsigned int depthMultiplier = weightsShape[3] / inputInfo.GetShape()[channelIndex];
224  weightsInfo.SetShape({ weightsShape[1],
225  weightsShape[2],
226  inputInfo.GetShape()[channelIndex],
227  depthMultiplier});
228 
229  // Permute [ H, W, I, M ] --> [ M, I, H, W ]
230  PermutationVector permutationVector = { 2, 3, 1, 0 };
231  ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
232 
233  return std::make_tuple(weightsPermuted, depthMultiplier);
234 }

References DataLayoutIndexed::GetChannelsIndex(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), TensorInfo::HasPerAxisQuantization(), PermuteTensor(), and TensorInfo::SetShape().

◆ ConvertActivationDescriptorToAclActivationLayerInfo() [1/2]

◆ ConvertActivationDescriptorToAclActivationLayerInfo() [2/2]

arm_compute::ActivationLayerInfo armnn::ConvertActivationDescriptorToAclActivationLayerInfo ( const ActivationDescriptor activationDescPtr)
inline

Definition at line 94 of file ArmComputeUtils.hpp.

95 {
96  if (activationDescPtr != nullptr)
97  {
98  return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
99  *activationDescPtr));
100  }
101  return arm_compute::ActivationLayerInfo();
102 }

References ConvertActivationDescriptorToAclActivationLayerInfo().

◆ ConvertActivationFunctionToAclActivationFunction()

arm_compute::ActivationLayerInfo::ActivationFunction armnn::ConvertActivationFunctionToAclActivationFunction ( ActivationFunction  armnnFunction)
inline

Definition at line 62 of file ArmComputeUtils.hpp.

63 {
64  using AclActivationFunction = arm_compute::ActivationLayerInfo::ActivationFunction;
65 
66  switch (armnnFunction)
67  {
68  case ActivationFunction::Linear: return AclActivationFunction::LINEAR;
69  // Arm compute's 'logistic' function is non-parameterized, so it is exactly a sigmoid function.
70  case ActivationFunction::Sigmoid: return AclActivationFunction::LOGISTIC;
71  case ActivationFunction::ReLu: return AclActivationFunction::RELU;
72  case ActivationFunction::BoundedReLu: return AclActivationFunction::LU_BOUNDED_RELU;
73  case ActivationFunction::SoftReLu: return AclActivationFunction::SOFT_RELU;
74  case ActivationFunction::LeakyReLu: return AclActivationFunction::LEAKY_RELU;
75  case ActivationFunction::Abs: return AclActivationFunction::ABS;
76  case ActivationFunction::Sqrt: return AclActivationFunction::SQRT;
77  case ActivationFunction::Square: return AclActivationFunction::SQUARE;
78  case ActivationFunction::TanH: return AclActivationFunction::TANH;
79  case ActivationFunction::Elu: return AclActivationFunction::ELU;
80  case ActivationFunction::HardSwish: return AclActivationFunction::HARD_SWISH;
81  case ActivationFunction::Gelu: return AclActivationFunction::GELU;
82  default: throw InvalidArgumentException("Unsupported activation function");
83  }
84 }

References Abs, BoundedReLu, Elu, Gelu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by ConvertActivationDescriptorToAclActivationLayerInfo().

◆ ConvertAdditionalInfoToAclActivationLayerInfo()

◆ ConvertComparisonOperationToAcl()

arm_compute::ComparisonOperation armnn::ConvertComparisonOperationToAcl ( const ComparisonDescriptor descriptor)
inline

Definition at line 141 of file ArmComputeUtils.hpp.

142 {
143  switch (descriptor.m_Operation)
144  {
145  case ComparisonOperation::Greater: return arm_compute::ComparisonOperation::Greater;
146  case ComparisonOperation::GreaterOrEqual: return arm_compute::ComparisonOperation::GreaterEqual;
147  case ComparisonOperation::Less: return arm_compute::ComparisonOperation::Less;
148  case ComparisonOperation::LessOrEqual: return arm_compute::ComparisonOperation::LessEqual;
149  case ComparisonOperation::Equal: return arm_compute::ComparisonOperation::Equal;
150  case ComparisonOperation::NotEqual: return arm_compute::ComparisonOperation::NotEqual;
151  default: throw InvalidArgumentException("Unsupported comparison function");
152  }
153 }

References Equal, Greater, GreaterOrEqual, Less, LessOrEqual, ComparisonDescriptor::m_Operation, and NotEqual.

Referenced by ClComparisonWorkload::ClComparisonWorkload(), and NeonComparisonWorkload::NeonComparisonWorkload().

◆ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo() [1/2]

arm_compute::FullyConnectedLayerInfo armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo ( const FullyConnectedDescriptor fullyConnectedDesc,
arm_compute::ActivationLayerInfo  activationLayerInfo 
)
inline

Definition at line 204 of file ArmComputeUtils.hpp.

206 {
207  arm_compute::FullyConnectedLayerInfo fc_info;
208  fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
209  fc_info.activation_info = activationLayerInfo;
210  return fc_info;
211 }

References FullyConnectedDescriptor::m_TransposeWeightMatrix.

◆ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo() [2/2]

arm_compute::FullyConnectedLayerInfo armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo ( const FullyConnectedDescriptor fullyConnectedDesc,
const ActivationDescriptor activationDesc 
)
inline

Definition at line 194 of file ArmComputeUtils.hpp.

196 {
197  arm_compute::FullyConnectedLayerInfo fc_info;
198  fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
199  fc_info.activation_info = ConvertActivationDescriptorToAclActivationLayerInfo(activationDesc);
200  return fc_info;
201 }

References ConvertActivationDescriptorToAclActivationLayerInfo(), and FullyConnectedDescriptor::m_TransposeWeightMatrix.

Referenced by ClFullyConnectedWorkload::ClFullyConnectedWorkload().

◆ ConvertLogSeverity()

constexpr LogSeverity armnn::ConvertLogSeverity ( BoostLogSeverityMapping  severity)
constexpr

Definition at line 206 of file Logging.hpp.

207 {
208  return static_cast<LogSeverity>(severity);
209 }

◆ ConvertLstmActivationFuncToAclLayerInfo()

arm_compute::ActivationLayerInfo armnn::ConvertLstmActivationFuncToAclLayerInfo ( uint32_t  activationFunction)
inline

Definition at line 118 of file ArmComputeUtils.hpp.

119 {
120  // For preparing the object for the class ActivationLayerInfo, we need to consider 5 situations.
121  switch (activationFunction)
122  {
123  case 0:
124  return arm_compute::ActivationLayerInfo(); // no activation, do nothing
125  case 1:
126  return arm_compute::ActivationLayerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
127  case 3:
128  return arm_compute::ActivationLayerInfo(
129  arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0);
130  case 4:
131  return arm_compute::ActivationLayerInfo(
132  arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0);
133  case 6:
134  return arm_compute::ActivationLayerInfo(
135  arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC);
136  default:
137  throw armnn::Exception("Wrong Type of Activation Function!");
138  }
139 }

◆ ConvertMaskToACLFormat()

int32_t ConvertMaskToACLFormat ( int32_t  mask,
int32_t  numDim 
)

Definition at line 298 of file WorkloadUtils.cpp.

299 {
300  int32_t reversedMask = 0;
301  for (unsigned int i = 0; i < armnn::numeric_cast<unsigned int>(numDim); ++i)
302  {
303  // Check if bit set in mask for each dimension
304  int32_t bit = (mask & 1 << i) != 0;
305  // Increment the new mask with the bits reversed
306  reversedMask += (bit << std::max(numDim-(armnn::numeric_cast<int>(i)+1), 0));
307  }
308 
309  return reversedMask;
310 }

Referenced by ClStridedSliceWorkload::ClStridedSliceWorkload(), and NeonStridedSliceWorkload::NeonStridedSliceWorkload().

◆ ConvertNormalizationAlgorithmChannelToAclNormType()

arm_compute::NormType armnn::ConvertNormalizationAlgorithmChannelToAclNormType ( NormalizationAlgorithmChannel  channelType)
inline

Definition at line 182 of file ArmComputeUtils.hpp.

183 {
184  using arm_compute::NormType;
185  switch (channelType)
186  {
187  case NormalizationAlgorithmChannel::Across: return NormType::CROSS_MAP;
188  case NormalizationAlgorithmChannel::Within: return NormType::IN_MAP_2D;
189  default: throw InvalidArgumentException("Unsupported normalization algorithm channel type");
190  }
191 }

References Across, and Within.

◆ ConvertOutputShapeRoundingToAclDimensionRoundingType()

arm_compute::DimensionRoundingType armnn::ConvertOutputShapeRoundingToAclDimensionRoundingType ( OutputShapeRounding  rounding)
inline

Definition at line 168 of file ArmComputeUtils.hpp.

170 {
171  using arm_compute::DimensionRoundingType;
172 
173  switch (rounding)
174  {
175  case OutputShapeRounding::Ceiling: return DimensionRoundingType::CEIL;
176  case OutputShapeRounding::Floor: return DimensionRoundingType::FLOOR;
177  default: throw InvalidArgumentException("Unsupported Output Shape Rounding type");
178  }
179 }

References Ceiling, and Floor.

◆ ConvertPaddingModeToAcl()

arm_compute::PaddingMode armnn::ConvertPaddingModeToAcl ( const PaddingMode paddingMode)
inline

Definition at line 322 of file ArmComputeUtils.hpp.

323 {
324  switch (paddingMode)
325  {
326  case PaddingMode::Constant: return arm_compute::PaddingMode::CONSTANT;
327  case PaddingMode::Reflect: return arm_compute::PaddingMode::REFLECT;
328  case PaddingMode::Symmetric: return arm_compute::PaddingMode::SYMMETRIC;
329  default: throw InvalidArgumentException("Unsupported Padding Mode");
330  }
331 }

References Constant, Reflect, and Symmetric.

◆ ConvertPoolingAlgorithmToAclPoolingType()

arm_compute::PoolingType armnn::ConvertPoolingAlgorithmToAclPoolingType ( PoolingAlgorithm  poolingAlgorithm)
inline

Definition at line 155 of file ArmComputeUtils.hpp.

156 {
157  using arm_compute::PoolingType;
158 
159  switch (poolingAlgorithm)
160  {
161  case PoolingAlgorithm::Max: return PoolingType::MAX;
162  case PoolingAlgorithm::Average: return PoolingType::AVG;
163  case PoolingAlgorithm::L2: return PoolingType::L2;
164  default: throw InvalidArgumentException("Unsupported pooling algorithm");
165  }
166 }

References Average, L2, and Max.

Referenced by CreatePool2dAttributes().

◆ ConvertReductionOperationToAcl()

arm_compute::ReductionOperation armnn::ConvertReductionOperationToAcl ( const ReduceDescriptor descriptor)
inline

Definition at line 333 of file ArmComputeUtils.hpp.

334 {
335  switch (descriptor.m_ReduceOperation)
336  {
337  case ReduceOperation::Sum: return arm_compute::ReductionOperation::SUM;
338  case ReduceOperation::Mean: return arm_compute::ReductionOperation::MEAN_SUM;
339  case ReduceOperation::Max: return arm_compute::ReductionOperation::MAX;
340  case ReduceOperation::Min: return arm_compute::ReductionOperation::MIN;
341  case ReduceOperation::Prod: return arm_compute::ReductionOperation::PROD;
342  default: throw InvalidArgumentException("Unsupported Reduction operation");
343  }
344 }

References ReduceDescriptor::m_ReduceOperation, Max, Mean, Min, Prod, and Sum.

◆ ConvertResizeMethodToAclInterpolationPolicy()

arm_compute::InterpolationPolicy armnn::ConvertResizeMethodToAclInterpolationPolicy ( ResizeMethod  resizeMethod)
inline

Definition at line 213 of file ArmComputeUtils.hpp.

214 {
215  switch (resizeMethod)
216  {
217  case ResizeMethod::Bilinear:
218  return arm_compute::InterpolationPolicy::BILINEAR;
219  case ResizeMethod::NearestNeighbor:
220  return arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR;
221  default:
222  throw InvalidArgumentException("Unsupported resize method");
223  }
224 }

References Bilinear, and NearestNeighbor.

◆ ConvertWeightTensorFromArmnnToAcl()

armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl ( const ConstTensorHandle weightTensor,
DataLayout  dataLayout,
void *  permuteBuffer 
)

Definition at line 236 of file WorkloadUtils.cpp.

239 {
240  if (weightTensor == nullptr)
241  {
242  throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null input tensor pointer");
243  }
244  if (permuteBuffer == nullptr)
245  {
246  throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null permute buffer pointer");
247  }
248 
249  auto multiplier = weightTensor->GetTensorInfo().GetShape()[0];
250  auto inputChannels = weightTensor->GetTensorInfo().GetShape()[1];
251 
252  // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
253  // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
254 
255  // 1. Permute the weights if necessary
256  // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
257  // starting from the current shape of [ M, I, H, W ]
258  // If no permutation is necessary, leave the permutation vector empty
259  PermutationVector permutationVector{};
260  if (dataLayout == DataLayout::NHWC)
261  {
262  // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
263  permutationVector = { 3, 2, 0, 1 };
264  }
265  ConstTensor weightPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
266 
267  // Shuffle the weights data to obtain the channel order needed used by Acl
268  if (multiplier > 1 && inputChannels > 1 && dataLayout == DataLayout::NCHW)
269  {
270  switch (weightPermuted.GetDataType())
271  {
272  case DataType::Float32:
273  weightPermuted = ReorderWeightChannelsForAcl<float>(weightPermuted, dataLayout, permuteBuffer);
274  break;
275  case DataType::Float16:
276  weightPermuted =
277  ReorderWeightChannelsForAcl<half_float::half>(weightPermuted, dataLayout, permuteBuffer);
278  break;
279  case DataType::QAsymmS8:
280  case DataType::QAsymmU8:
281  weightPermuted = ReorderWeightChannelsForAcl<uint8_t>(weightPermuted, dataLayout, permuteBuffer);
282  break;
283  case DataType::QSymmS8:
284  weightPermuted = ReorderWeightChannelsForAcl<int8_t>(weightPermuted, dataLayout, permuteBuffer);
285  break;
286  default:
287  break;
288  }
289  }
290 
291  // 2. Reshape the weights
292  ReshapeWeightsForAcl(weightPermuted.GetInfo(), dataLayout);
293 
294  // 3. Return both the tensor and the allocated storage to ensure that the data stays alive
295  return weightPermuted;
296 }

References Float16, Float32, BaseTensor< MemoryType >::GetDataType(), BaseTensor< MemoryType >::GetInfo(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), NCHW, NHWC, PermuteTensor(), QAsymmS8, QAsymmU8, QSymmS8, and ReshapeWeightsForAcl().

◆ ConvertWeightTensorInfoFromArmnnToAcl()

TensorInfo ConvertWeightTensorInfoFromArmnnToAcl ( const TensorInfo weightInfo,
DataLayout  dataLayout 
)

Definition at line 121 of file WorkloadUtils.cpp.

122 {
123  // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
124  // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
125 
126  // 1. Permute the weights if necessary
127  // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
128  // starting from the current shape of [ M, I, H, W ]
129  TensorInfo weightPermutedInfo(weightInfo);
130  if (dataLayout == DataLayout::NHWC)
131  {
132  // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
133  PermutationVector permutationVector{ 3, 2, 0, 1 };
134  weightPermutedInfo = armnnUtils::Permuted(weightInfo, permutationVector);
135  }
136 
137  // 2. Reshape the weights
138  ReshapeWeightsForAcl(weightPermutedInfo, dataLayout);
139 
140  // 3. Return the permuted weight info
141  return weightPermutedInfo;
142 }

References NHWC, armnnUtils::Permuted(), and ReshapeWeightsForAcl().

◆ Convolve()

void Convolve ( const TensorShape rInputShape,
Decoder< float > &  rInputDecoder,
const TensorShape rOutputShape,
Encoder< float > &  rOutputEncoder,
const TensorShape rFilterShape,
Decoder< float > &  rFilterDecoder,
bool  biasEnabled,
Decoder< float > *  pBiasDecoder,
DataLayout  dataLayout,
unsigned int  paddingTop,
unsigned int  paddingLeft,
unsigned int  xStride,
unsigned int  yStride,
unsigned int  xDilation,
unsigned int  yDilation,
bool  depthwise 
)

Definition at line 71 of file ConvImpl.cpp.

87 {
88  if (biasEnabled && !pBiasDecoder)
89  {
90  throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
91  }
92  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
93 
94  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
95  const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
96  const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
97 
98  // Weights layout:
99  // Conv2d: [O,H,W,I]
100  // Depthwise: [1,H,W,O]
101  const unsigned int inputChannels = rInputShape[channelsIndex];
102  const unsigned int outputChannels = rOutputShape[channelsIndex];
103  const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
104 
105  const unsigned int batchSize = rOutputShape[0];
106  const unsigned int outputHeight = rOutputShape[heightIndex];
107  const unsigned int outputWidth = rOutputShape[widthIndex];
108  const unsigned int inputHeight = rInputShape[heightIndex];
109  const unsigned int inputWidth = rInputShape[widthIndex];
110 
111  const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
112  const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
113 
114  const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
115  const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);
116 
117  const TensorShape biasShape{outputChannels};
118  const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
119 
120  for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
121  {
122  for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
123  {
124  for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
125  {
126  for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
127  {
128  // This loop goes over each output element.
129  float sum = 0.0f;
130 
131  // For depthwise, each output channel corresponds to exactly one input channel.
132  // For normal, must loop over each input channel.
133  for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
134  {
135  for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
136  {
137  for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
138  {
139  // This loop goes over each input element for each output element.
140  unsigned int filterIndex = 0;
141 
142  // Since dimensionality of kernel depends on depthwiseness, so does index.
143  if (depthwise)
144  {
145  cInput = cOutput / depthMultiplier;
146  // filterDepth = outputChannels;
147  filterIndex = xFilter * outputChannels + cOutput +
148  yFilter * filterWidth * outputChannels;
149  }
150  else
151  {
152  // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
153  // performance regression.
154  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
155  {
156  filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
157  yFilter * filterWidth * inputChannels +
158  xFilter * inputChannels +
159  cInput;
160  }
161  else
162  {
163  filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
164  cInput * filterWidth * filterHeight +
165  yFilter * filterWidth +
166  xFilter;
167  }
168  }
169 
170  unsigned int yInput = yOutput * yStride + yFilter * yDilation;
171  unsigned int xInput = xOutput * xStride + xFilter * xDilation;
172 
173  float inputValue;
174 
175  // Check if we're in the padding.
176  if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
177  xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
178  {
179  inputValue = 0.0f;
180  }
181  else
182  {
183  unsigned int inputIndex = 0;
184 
185  // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
186  // performance regression.
187  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
188  {
189  inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
190  (yInput - paddingTop) * inputWidth * inputChannels +
191  (xInput - paddingLeft) * inputChannels +
192  cInput;
193  }
194  else
195  {
196  inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
197  inputWidth * inputHeight * cInput +
198  inputWidth * (yInput - paddingTop) +
199  xInput - paddingLeft;
200  }
201  inputValue = inputVec[inputIndex];
202  }
203 
204  sum += filterVec[filterIndex] * inputValue;
205  }
206  }
207  }
208 
209  if (biasEnabled)
210  {
211  sum += biasVec[cOutput];
212  }
213 
214  unsigned int outIdx;
215  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
216  {
217  outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
218  yOutput * outputWidth * outputChannels +
219  xOutput * outputChannels +
220  cOutput;
221  }
222  else
223  {
224  outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
225  cOutput * outputHeight * outputWidth +
226  yOutput * outputWidth +
227  xOutput;
228  }
229 
230  rOutputEncoder[outIdx];
231  rOutputEncoder.Set(sum);
232  }
233  }
234  }
235  }
236 }

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), NHWC, and Encoder< IType >::Set().

◆ Convolve3d()

void Convolve3d ( const TensorShape rInputShape,
Decoder< float > &  rInputDecoder,
const TensorShape rOutputShape,
Encoder< float > &  rOutputEncoder,
const TensorShape rFilterShape,
Decoder< float > &  rFilterDecoder,
bool  biasEnabled,
Decoder< float > *  pBiasDecoder,
DataLayout  dataLayout,
unsigned int  paddingTop,
unsigned int  paddingLeft,
unsigned int  paddingFront,
unsigned int  xStride,
unsigned int  yStride,
unsigned int  zStride,
unsigned int  xDilation,
unsigned int  yDilation,
unsigned int  zDilation 
)

Definition at line 11 of file Conv3dImpl.cpp.

29 {
30  if (biasEnabled && !pBiasDecoder)
31  {
32  throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
33  }
34  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
35 
36  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
37  const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
38  const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
39  const unsigned int depthIndex = dataLayoutIndexed.GetDepthIndex();
40 
41  const unsigned int inChannels = rInputShape[channelsIndex];
42  const unsigned int outChannels = rOutputShape[channelsIndex];
43 
44  const unsigned int batchSize = rOutputShape[0];
45  const unsigned int outputHeight = rOutputShape[heightIndex];
46  const unsigned int outputWidth = rOutputShape[widthIndex];
47  const unsigned int outputDepth = rOutputShape[depthIndex];
48  const unsigned int inputHeight = rInputShape[heightIndex];
49  const unsigned int inputWidth = rInputShape[widthIndex];
50  const unsigned int inputDepth = rInputShape[depthIndex];
51 
52  // Conv3d weights layout: [D,H,W,I,O]
53  const unsigned int filterDepth = rFilterShape[0];
54  const unsigned int filterHeight = rFilterShape[1];
55  const unsigned int filterWidth = rFilterShape[2];
56 
57  const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
58  const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape);
59 
60  const TensorShape biasShape{outChannels};
61  const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
62 
63  for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
64  {
65  for (unsigned int zOutput = 0; zOutput < outputDepth; zOutput++)
66  {
67  for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
68  {
69  for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
70  {
71  for (unsigned int cOutput = 0; cOutput < outChannels; cOutput++)
72  {
73  // This loop goes over each output element.
74  float sum = 0.0f;
75 
76  // Loop over each input channel.
77  for (unsigned int zFilter = 0; zFilter < filterDepth; zFilter++)
78  {
79  for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
80  {
81  for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
82  {
83  for (unsigned int cInput = 0; cInput < inChannels; cInput++)
84  {
85  // This loop goes over each input element for each output element.
86  unsigned int filterIndex = 0;
87 
88  // Conv3d weights layout: [D,H,W,I,O]
89  // Keep this implementation, as using DataLayoutIndexed::GetIndex
90  // causes large performance regression.
91  filterIndex = zFilter * filterHeight * filterWidth * inChannels * outChannels +
92  yFilter * filterWidth * inChannels * outChannels +
93  xFilter * inChannels * outChannels +
94  cInput * outChannels +
95  cOutput;
96 
97  unsigned int yInput = yOutput * yStride + yFilter * yDilation;
98  unsigned int xInput = xOutput * xStride + xFilter * xDilation;
99  unsigned int zInput = zOutput * zStride + zFilter * zDilation;
100 
101  float inputValue;
102 
103  // Check if we're in the padding.
104  if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
105  xInput < paddingLeft || xInput >= inputWidth + paddingLeft ||
106  zInput < paddingFront || zInput >= inputDepth + paddingFront)
107  {
108  inputValue = 0.0f;
109  }
110  else
111  {
112  unsigned int inputIndex = 0;
113 
114  // Keep this implementation, as using DataLayoutIndexed::GetIndex
115  // causes large performance regression.
116  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)
117  {
118  inputIndex =
119  batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
120  (zInput-paddingFront) * inputHeight * inputWidth * inChannels +
121  (yInput-paddingTop) * inputWidth * inChannels +
122  (xInput-paddingLeft) * inChannels +
123  cInput;
124  }
125  else
126  {
127  // NCDHW DataLayout
128  inputIndex =
129  batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
130  inputDepth * inputHeight * inputWidth * cInput +
131  (zInput-paddingFront) * inputHeight * inputWidth +
132  (yInput-paddingTop) * inputWidth +
133  xInput-paddingLeft;
134  }
135 
136  inputValue = inputVec[inputIndex];
137  }
138 
139  sum += filterVec[filterIndex] * inputValue;
140  }
141  }
142  }
143  }
144 
145  if (biasEnabled)
146  {
147  sum += biasVec[cOutput];
148  }
149 
150  unsigned int outIdx;
151  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)
152  {
153  outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
154  zOutput * outputHeight * outputWidth * outChannels +
155  yOutput * outputWidth * outChannels +
156  xOutput * outChannels +
157  cOutput;
158  }
159  else
160  {
161  // NCDHW DataLayout
162  outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
163  cOutput * outputDepth * outputHeight * outputWidth +
164  zOutput * outputHeight * outputWidth +
165  yOutput * outputWidth +
166  xOutput;
167  }
168 
169  rOutputEncoder[outIdx];
170  rOutputEncoder.Set(sum);
171  }
172  }
173  }
174  }
175  }
176 }

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetDepthIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), NDHWC, and Encoder< IType >::Set().

◆ CoordinatesToIndex()

uint32_t armnn::CoordinatesToIndex ( TensorShape shape,
std::vector< uint32_t > &  coordinates 
)

Definition at line 32 of file Tile.cpp.

33 {
34  uint32_t index = 0;
35  uint32_t base = 1;
36  uint32_t rank = shape.GetNumDimensions();
37  for (uint32_t i = rank; i > 0; --i)
38  {
39  index = index + coordinates[i - 1] * base;
40  base = base * shape[i - 1];
41  }
42  return index;
43 }

References TensorShape::GetNumDimensions().

Referenced by Tile().

◆ CopyArmComputeClTensorData()

void CopyArmComputeClTensorData ( arm_compute::CLTensor &  dstTensor,
const T *  srcData 
)

Definition at line 64 of file ClWorkloadUtils.hpp.

65 {
66  {
67  ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting");
68  dstTensor.map(true);
69  }
70 
71  {
72  ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor");
73  armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor);
74  }
75 
76  dstTensor.unmap();
77 }

References ARMNN_SCOPED_PROFILING_EVENT_CL.

Referenced by ClConstantWorkload::Execute(), and GpuFsaConstantWorkload::Execute().

◆ CopyArmComputeTensorData()

void armnn::CopyArmComputeTensorData ( arm_compute::Tensor &  dstTensor,
const T *  srcData 
)

Definition at line 62 of file NeonWorkloadUtils.hpp.

63 {
64  InitialiseArmComputeTensorEmpty(dstTensor);
65  CopyArmComputeITensorData(srcData, dstTensor);
66 }

Referenced by InitializeArmComputeTensorData().

◆ CopyTensorContentsGeneric()

void armnn::CopyTensorContentsGeneric ( const ITensorHandle srcTensor,
ITensorHandle dstTensor,
CopyFunc  copy 
)

Definition at line 46 of file WorkloadUtils.hpp.

47 {
48  // For ease of understanding, names are assigned to the dimensions
49  // of the tensor as if NHWC, however this routine works with any 5D tensor
50  static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
51 
52  TensorShape srcStrides = srcTensor->GetStrides();
53  const TensorShape& srcShape = srcTensor->GetShape();
54  const auto srcSize = srcTensor->GetStrides()[0] * srcShape[0];
55  TensorShape dstStrides = dstTensor->GetStrides();
56  const TensorShape& dstShape = dstTensor->GetShape();
57  const auto dstSize = dstTensor->GetStrides()[0] * dstShape[0];
58 
59  size_t srcDepth = 1;
60  size_t srcBatches = 1;
61  size_t srcHeight = 1;
62  size_t srcWidth = 1;
63  size_t srcChannels = 1;
64  AssignValues(srcShape.GetNumDimensions(),
65  0,
66  srcShape,
67  srcChannels,
68  srcWidth,
69  srcHeight,
70  srcBatches,
71  srcDepth);
72 
73  size_t srcDepthStride = 0;
74  size_t srcBatchStride = 0;
75  size_t srcHeightStride = 0;
76  size_t srcWidthStride = 0;
77  size_t srcChannelStride = 0;
78  AssignValues(srcStrides.GetNumDimensions(),
79  0,
80  srcStrides,
81  srcChannelStride,
82  srcWidthStride,
83  srcHeightStride,
84  srcBatchStride,
85  srcDepthStride);
86 
87  size_t dstDepth = 1;
88  size_t dstBatches = 1;
89  size_t dstHeight = 1;
90  size_t dstWidth = 1;
91  size_t dstChannels = 1;
92  AssignValues(dstShape.GetNumDimensions(),
93  0,
94  dstShape,
95  dstChannels,
96  dstWidth,
97  dstHeight,
98  dstBatches,
99  dstDepth);
100 
101  size_t dstDepthStride = 0;
102  size_t dstBatchStride = 0;
103  size_t dstHeightStride = 0;
104  size_t dstWidthStride = 0;
105  size_t dstChannelStride = 0;
106  AssignValues(dstStrides.GetNumDimensions(),
107  0,
108  dstStrides,
109  dstChannelStride,
110  dstWidthStride,
111  dstHeightStride,
112  dstBatchStride,
113  dstDepthStride);
114 
115  const unsigned char* srcDataStart;
116  unsigned char* dstDataStart;
117  {
118  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
119  srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
120  dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
121  }
122  if (srcDataStart == nullptr)
123  {
124  throw MemoryValidationException("The source tensor is null.");
125  }
126  if (dstDataStart == nullptr)
127  {
128  throw MemoryValidationException("The destination tensor is null.");
129  }
130 
131  size_t copyLength = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
132  size_t copyWidth = std::min(srcWidth, dstWidth);
133  size_t copyHeight = std::min(srcHeight, dstHeight);
134  size_t copyBatches = std::min(srcBatches, dstBatches);
135  size_t copyDepth = std::min(srcDepth, dstDepth);
136 
137  // Coalesce inner dimensions where possible
138  // to reduce overheard calling copy() and to
139  // allow for memory bandwidth optimisations
140  if (copyLength == srcWidthStride &&
141  copyLength == dstWidthStride)
142  {
143  // There is no special padding between rows,
144  // and sizes are compatible, so copy whole rows
145  copyLength *= copyWidth;
146  copyWidth = 1;
147 
148  if (copyLength == srcHeightStride &&
149  copyLength == dstHeightStride)
150  {
151  // There is no special padding between batches
152  // and sizes are compatible so copy whole batches
153  copyLength *= copyHeight;
154  copyHeight = 1;
155  }
156  }
157 
158  const unsigned char* srcData = srcDataStart;
159  unsigned char* dstData = dstDataStart;
160  for (unsigned int d = 0; d < copyDepth; ++d)
161  {
162  auto srcPtrDepth = srcData;
163  auto dstPtrDepth = dstData;
164  for (unsigned int b = 0; b < copyBatches; ++b)
165  {
166  auto srcPtrBatch = srcData;
167  auto dstPtrBatch = dstData;
168  for (unsigned int h = 0; h < copyHeight; ++h)
169  {
170  auto srcPtrChannel = srcData;
171  auto dstPtrChannel = dstData;
172  for (unsigned int w = 0; w < copyWidth; ++w)
173  {
174  // Sanity check the memory area we've been asked to copy from and to.
175  if (copyLength > srcSize)
176  {
177  throw MemoryValidationException(
178  "The source tensor size does not match the size of the allocated tensor.");
179  }
180  if (copyLength > dstSize)
181  {
182  throw MemoryValidationException(
183  "The destination tensor size will overrun the destination tensor.");
184  }
185  copy(dstData, srcData, copyLength);
186  dstData += dstWidthStride;
187  srcData += srcWidthStride;
188  }
189  dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
190  srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
191  }
192  dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
193  srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
194  }
195  dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
196  srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
197  }
198 
199  srcTensor->Unmap();
200  dstTensor->Unmap();
201 }

References ARMNN_SCOPED_PROFILING_EVENT, TensorShape::GetNumDimensions(), ITensorHandle::GetShape(), ITensorHandle::GetStrides(), ITensorHandle::Map(), MaxNumOfTensorDimensions, Undefined, and ITensorHandle::Unmap().

Referenced by CopyToOutputTensor(), CopyMemGenericWorkload::Execute(), NeonConvertFp32ToFp16Workload::Execute(), NeonConvertFp16ToFp32Workload::Execute(), and CopyMemGenericWorkload::ExecuteAsync().

◆ CopyToOutputTensor()

void armnn::CopyToOutputTensor ( const Tensor outputTensor,
ITensorHandle outputTensorHandle 
)

Definition at line 1388 of file LoadedNetwork.cpp.

1389 {
1390  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyOutput");
1391  auto copyFunc = [](void* dst, const void* src, size_t size)
1392  {
1393  memcpy(dst, src, size);
1394  };
1395 
1396  std::unique_ptr<ITensorHandle> tensorHandle =
1397  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1398  outputTensor.GetMemoryArea());
1399 
1400  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1401 }

References ARMNN_SCOPED_PROFILING_EVENT, CopyTensorContentsGeneric(), BaseTensor< MemoryType >::GetInfo(), BaseTensor< MemoryType >::GetMemoryArea(), and Undefined.

Referenced by LoadedNetwork::Execute().

◆ CreateAclNormalizationLayerInfoForL2Normalization()

arm_compute::NormalizationLayerInfo armnn::CreateAclNormalizationLayerInfoForL2Normalization ( const armnn::TensorInfo tensorInfo,
armnn::DataLayout  dataLayout 
)
inline

Definition at line 29 of file ArmComputeUtils.hpp.

31 {
32  unsigned int depthDimension = dataLayout == armnn::DataLayout::NCHW ? 1 : 3;
33  const unsigned int depth = tensorInfo.GetShape()[depthDimension];
34 
35  // At the time of writing, {CL|Neon}L2Normalization performs the reduction only along dimension 0. This version of
36  // L2 Normalization always performs the reduction along the depth axis, though. Thus, we repurpose
37  // {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by carefully chosing the normalization
38  // parameters.
39  //
40  // Please refer to both the reference implementation of the normalization layer and the implementation of
41  // {CL|Neon}NormalizationLayer when checking the derivations for the parameter values below.
42 
43  // Make sure normalization covers the entire depth range. ACL requires the normalization size to be odd.
44  // CL: This does not result in extra kernel threads not doing any work: See usage of the RADIUS parameter in
45  // ACL's normalization_layer_cross_map() CL function.
46  const uint32_t normSize = depth * 2u + 1u;
47 
48  // See ACL's NormalizationLayerInfo::scale_coeff() definition.
49  // For the reference implementation, to make alpha_ become 1, we'd have to use alpha = normSize instead.
50  const float alpha = 1.0f;
51 
52  // Don't offset the reduction.
53  const float kappa = 0.0f;
54 
55  // pow(reduction, -0.5) = 1 / sqrt(reduction)
56  const float beta = 0.5f;
57 
58  return arm_compute::NormalizationLayerInfo(arm_compute::NormType::CROSS_MAP, normSize, alpha, beta, kappa, false);
59 }

References TensorInfo::GetShape(), and NCHW.

◆ CreateClContext()

flatbuffers::Offset<ClContext> armnn::CreateClContext ( flatbuffers::FlatBufferBuilder &  _fbb,
flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>>  programs = 0 
)
inline

Definition at line 57 of file ClContextSchema_generated.h.

59  {
60  ClContextBuilder builder_(_fbb);
61  builder_.add_programs(programs);
62  return builder_.Finish();
63 }

References ClContextBuilder::add_programs(), and ClContextBuilder::Finish().

Referenced by CreateClContextDirect(), and ClContextSerializer::Serialize().

◆ CreateClContextDirect()

flatbuffers::Offset<ClContext> armnn::CreateClContextDirect ( flatbuffers::FlatBufferBuilder &  _fbb,
const std::vector< flatbuffers::Offset< armnn::Program >> *  programs = nullptr 
)
inline

Definition at line 65 of file ClContextSchema_generated.h.

67  {
68  auto programs__ = programs ? _fbb.CreateVector<flatbuffers::Offset<armnn::Program>>(*programs) : 0;
70  _fbb,
71  programs__);
72 }

References CreateClContext().

◆ CreateDescriptorForConcatenation()

OriginsDescriptor armnn::CreateDescriptorForConcatenation ( TensorShapeIt  first,
TensorShapeIt  last,
unsigned int  concatenationDimension 
)

Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing concatenation of a number of input tensors.

Definition at line 300 of file Descriptors.hpp.

303 {
304  auto numInputs = std::distance(first, last);
305 
306  if (numInputs < 2)
307  {
308  throw InvalidArgumentException("Concatenation requires at least 2 inputs");
309  }
310 
311  const auto& firstInputShape = *first;
312 
313  const unsigned int numDimensions = firstInputShape.GetNumDimensions();
314  for (auto it = first + 1; it != last; ++it)
315  {
316  if (it->GetNumDimensions() != numDimensions)
317  {
318  throw InvalidArgumentException("All inputs to concatenation must have the same number of dimensions");
319  }
320  }
321 
322  if (concatenationDimension >= numDimensions)
323  {
324  throw InvalidArgumentException("concatenationDimension must be between 0 and the number of dimensions.");
325  }
326 
327  for (auto it = first; it != last; ++it)
328  {
329  for (unsigned int d = 0; d < numDimensions; ++d)
330  {
331  const bool dimSizeOk = (d == concatenationDimension) || (firstInputShape[d] == (*it)[d]);
332  if (!dimSizeOk)
333  {
334  throw InvalidArgumentException("All inputs to concatenation must be the same size along all dimensions "
335  " except the concatenation dimension");
336  }
337  }
338  }
339 
340  OriginsDescriptor viewsDescriptor(static_cast<uint32_t>(numInputs), numDimensions);
341  viewsDescriptor.SetConcatAxis(concatenationDimension);
342 
343  uint32_t viewIndex = 0u;
344  uint32_t coordAlongConcatDim = 0u;
345  for (auto it = first; it != last; ++it)
346  {
347  const auto& inputShape = *it;
348 
349  for (unsigned int i = 0; i < concatenationDimension; ++i)
350  {
351  viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
352  }
353 
354  viewsDescriptor.SetViewOriginCoord(viewIndex, concatenationDimension, coordAlongConcatDim);
355  unsigned int dimSize = inputShape[concatenationDimension];
356  coordAlongConcatDim += dimSize;
357 
358 
359  for (unsigned int i = concatenationDimension + 1; i < numDimensions; ++i)
360  {
361  viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
362  }
363 
364  ++viewIndex;
365  }
366 
367  return viewsDescriptor;
368 }

References OriginsDescriptor::SetConcatAxis(), and OriginsDescriptor::SetViewOriginCoord().

◆ CreateInputsFrom()

SubgraphView::InputSlots armnn::CreateInputsFrom ( Layer layer)
inline

Definition at line 42 of file GpuFsaBackend.cpp.

43 {
44  SubgraphView::InputSlots result;
45  for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it)
46  {
47  result.push_back(&(*it));
48  }
49  return result;
50 }

References Layer::BeginInputSlots(), and Layer::EndInputSlots().

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ CreateOutputsFrom()

SubgraphView::OutputSlots armnn::CreateOutputsFrom ( Layer layer)
inline

Definition at line 52 of file GpuFsaBackend.cpp.

53 {
54  SubgraphView::OutputSlots result;
55  for (auto&& it = layer->BeginOutputSlots(); it != layer->EndOutputSlots(); ++it)
56  {
57  result.push_back(&(*it));
58  }
59  return result;
60 }

References Layer::BeginOutputSlots(), and Layer::EndOutputSlots().

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ CreateProgram()

flatbuffers::Offset<Program> armnn::CreateProgram ( flatbuffers::FlatBufferBuilder &  _fbb,
flatbuffers::Offset< flatbuffers::String >  name = 0,
flatbuffers::Offset< flatbuffers::Vector< uint8_t >>  binary = 0 
)
inline

Definition at line 118 of file ClContextSchema_generated.h.

121  {
122  ProgramBuilder builder_(_fbb);
123  builder_.add_binary(binary);
124  builder_.add_name(name);
125  return builder_.Finish();
126 }

References ProgramBuilder::add_binary(), ProgramBuilder::add_name(), and ProgramBuilder::Finish().

Referenced by CreateProgramDirect(), and ClContextSerializer::Serialize().

◆ CreateProgramDirect()

flatbuffers::Offset<Program> armnn::CreateProgramDirect ( flatbuffers::FlatBufferBuilder &  _fbb,
const char *  name = nullptr,
const std::vector< uint8_t > *  binary = nullptr 
)
inline

Definition at line 128 of file ClContextSchema_generated.h.

131  {
132  auto name__ = name ? _fbb.CreateString(name) : 0;
133  auto binary__ = binary ? _fbb.CreateVector<uint8_t>(*binary) : 0;
134  return armnn::CreateProgram(
135  _fbb,
136  name__,
137  binary__);
138 }

References CreateProgram().

◆ CreateSubgraphViewFrom()

SubgraphView::SubgraphViewPtr armnn::CreateSubgraphViewFrom ( SubgraphView::InputSlots &&  inputs,
SubgraphView::OutputSlots &&  outputs,
SubgraphView::Layers &&  layers 
)
inline

Definition at line 62 of file GpuFsaBackend.cpp.

65 {
66  return std::make_unique<SubgraphView>(std::move(inputs), std::move(outputs), std::move(layers));
67 }

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ CreateSupportedBackends()

BackendsMap CreateSupportedBackends ( TensorHandleFactoryRegistry handleFactoryRegistry,
BackendSettings backendSettings 
)

Definition at line 1309 of file Network.cpp.

1311 {
1312  BackendsMap backends;
1313  auto const& backendRegistry = BackendRegistryInstance();
1314  for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
1315  {
1316  auto backendFactory = backendRegistry.GetFactory(selectedBackend);
1317  auto backendObjPtr = backendFactory();
1318  ARMNN_ASSERT(backendObjPtr);
1319 
1320  backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
1321 
1322  backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
1323  }
1324 
1325  return backends;
1326 }

References ARMNN_ASSERT, BackendRegistryInstance(), and BackendSettings::m_SupportedBackends.

Referenced by Optimize().

◆ Debug()

void Debug ( const TensorInfo inputInfo,
const T *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

Definition at line 97 of file Debug.cpp.

103 {
104  if (outputsToFile)
105  {
106 #if !defined(ARMNN_DISABLE_FILESYSTEM)
107  fs::path tmpDir = fs::temp_directory_path();
108  std::ofstream out(tmpDir.generic_string() + "/ArmNNIntermediateLayerOutputs/" + layerName + ".numpy");
109  PrintOutput<T>(inputInfo, inputData, guid, layerName, slotIndex, out);
110  out.close();
111 #endif
112  }
113  else
114  {
115  PrintOutput<T>(inputInfo, inputData, guid, layerName, slotIndex, std::cout);
116  }
117 }

◆ Debug< BFloat16 >()

template void armnn::Debug< BFloat16 > ( const TensorInfo inputInfo,
const BFloat16 inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< float >()

template void armnn::Debug< float > ( const TensorInfo inputInfo,
const float *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< Half >()

template void armnn::Debug< Half > ( const TensorInfo inputInfo,
const Half inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< int16_t >()

template void armnn::Debug< int16_t > ( const TensorInfo inputInfo,
const int16_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< int32_t >()

template void armnn::Debug< int32_t > ( const TensorInfo inputInfo,
const int32_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< int64_t >()

template void armnn::Debug< int64_t > ( const TensorInfo inputInfo,
const int64_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< int8_t >()

template void armnn::Debug< int8_t > ( const TensorInfo inputInfo,
const int8_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ Debug< uint8_t >()

template void armnn::Debug< uint8_t > ( const TensorInfo inputInfo,
const uint8_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
bool  outputsToFile 
)

◆ DeleteAsType()

void DeleteAsType ( const void *const  blob)
inline

Definition at line 37 of file GpuFsaBackend.cpp.

38 {
39  delete static_cast<const T*>(blob);
40 }

◆ DepthToSpace()

void DepthToSpace ( const TensorInfo inputInfo,
const DepthToSpaceDescriptor descriptor,
const void *  inputData,
void *  outputData,
unsigned int  dataTypeSize 
)

Definition at line 18 of file DepthToSpace.cpp.

23 {
24  const unsigned int blockSize = descriptor.m_BlockSize;
25  ARMNN_ASSERT(blockSize != 0u);
26 
27  const TensorShape& inputShape = inputInfo.GetShape();
28  const unsigned int batches = inputShape[0];
29 
30  armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
31  const unsigned int inDepth = inputShape[dataLayoutIndexed.GetChannelsIndex()];
32  const unsigned int inHeight = inputShape[dataLayoutIndexed.GetHeightIndex()];
33  const unsigned int inWidth = inputShape[dataLayoutIndexed.GetWidthIndex()];
34 
35  const unsigned int outDepth = inDepth / (blockSize * blockSize);
36 
37  // The 4D input data can be interpreted as 6D (implicitly reshaped) as follows:
38  //
39  // [batch, block size, block size, inDepth, inHeight, inWidth] for NCHW and
40  // [batch, inHeight, inWidth, blockSize, blockSize, outDepth] for NHWC.
41  //
42  // DepthToSpace can then be implemented as a permutation in 6D resulting in
43  // the following shapes:
44  //
45  // [batch, outDepth, inHeight, blockSize, inWidth, blockSize] for NCHW and
46  // [batch, inHeight, blockSize, inWidth, blockSize, outDepth] for NHWC.
47  //
48  // NOTE:
49  // Since 6D tensors are not currently supported, in practice we need to handle each
50  // batch separately and execute 5D permutations
51 
52  TensorShape permDestShape;
53  PermutationVector permVector{};
54  if (descriptor.m_DataLayout == DataLayout::NCHW)
55  {
56  permDestShape = TensorShape({ outDepth, inHeight, blockSize, inWidth, blockSize });
57  permVector = { 2, 4, 0, 1, 3 };
58  }
59  else
60  {
61  permDestShape = TensorShape({ inHeight, blockSize, inWidth, blockSize, outDepth });
62  permVector = { 0, 2, 1, 3, 4 };
63  }
64 
65  const unsigned int numElementsPerBatch = inputShape.GetNumElements() / batches;
66 
67  for (unsigned int batchIndex = 0u; batchIndex < batches; ++batchIndex)
68  {
69  const uintptr_t batchDataOffset = batchIndex * (numElementsPerBatch * dataTypeSize);
70 
71  armnnUtils::Permute(permDestShape,
72  permVector,
73  static_cast<const void*>(reinterpret_cast<const uint8_t*>(inputData) + batchDataOffset),
74  static_cast<void*>(reinterpret_cast<uint8_t*>(outputData) + batchDataOffset),
75  dataTypeSize);
76  }
77 }

References ARMNN_ASSERT, DepthToSpace(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumElements(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToDepthDescriptor::m_BlockSize, SpaceToDepthDescriptor::m_DataLayout, and armnnUtils::Permute().

Referenced by DepthToSpace().

◆ Dequantize() [1/4]

void armnn::Dequantize ( const T *  inputData,
float *  outputData,
const TensorInfo info 
)
inline

Definition at line 113 of file RefWorkloadUtils.hpp.

114 {
115  for (unsigned int i = 0; i < info.GetNumElements(); i++)
116  {
117  outputData[i] = Dequantize<T>(inputData[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
118  }
119 }

References info.

◆ Dequantize() [2/4]

std::vector<float> armnn::Dequantize ( const T *  quant,
const TensorInfo info 
)

u8 helpers

Definition at line 102 of file RefWorkloadUtils.hpp.

103 {
104  std::vector<float> ret(info.GetNumElements());
105  for (size_t i = 0; i < info.GetNumElements(); i++)
106  {
107  ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
108  }
109  return ret;
110 }

References Dequantize(), and info.

◆ Dequantize() [3/4]

void Dequantize ( Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder,
const TensorInfo inputInfo,
const TensorInfo outputInfo 
)

Definition at line 13 of file Dequantize.cpp.

17 {
18  IgnoreUnused(outputInfo);
19  ARMNN_ASSERT(inputInfo.GetNumElements() == outputInfo.GetNumElements());
20  for (unsigned int i = 0; i < inputInfo.GetNumElements(); i++)
21  {
22  // inputDecoder.Get() dequantizes the data element from whatever
23  // type is given by inputInfo to fp32 (If MakeDecoder supports that dequantization)
24  // outputEncoder.Set() transforms the data element to whatever type is
25  // given by outputInfo (if MakeEncoder supports that transformation)
26  outputEncoder.Set(inputDecoder.Get());
27  ++outputEncoder;
28  ++inputDecoder;
29  }
30 }

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumElements(), IgnoreUnused(), and Encoder< IType >::Set().

◆ Dequantize() [4/4]

float Dequantize ( QuantizedType  value,
float  scale,
int32_t  offset 
)

Dequantize an 8-bit data type into a floating point data type.

Parameters
value- The value to dequantize.
scale- The scale (must be non-zero).
offset- The offset.
Returns
- The dequantized value calculated as (value-offset)*scale.

Definition at line 52 of file TypesUtils.cpp.

53 {
54  static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
55  if (scale == 0.f)
56  {
57  throw armnn::InvalidArgumentException("Dequantize: Scale cannot be 0.f");
58  }
59  if (std::isnan(value))
60  {
61  throw armnn::InvalidArgumentException("Dequantize: Value is NaN");
62  }
63  return (armnn::numeric_cast<float>(value - offset)) * scale;
64 }

Referenced by QASymm8Decoder::DecodeTensor(), QASymmS8Decoder::DecodeTensor(), QSymmS8Decoder::DecodeTensor(), QSymm16Decoder::DecodeTensor(), QSymm8PerAxisDecoder::DecodeTensor(), ScaledInt32PerAxisDecoder::DecodeTensor(), SelectiveQuantizer< T, DoQuantize >::Dequantize(), Dequantize(), QASymm8Decoder::Get(), QASymmS8Decoder::Get(), QSymmS8Decoder::Get(), QSymm16Decoder::Get(), QASymm8Encoder::Get(), QASymmS8Encoder::Get(), QSymmS8Encoder::Get(), QSymm16Encoder::Get(), QSymm8PerAxisDecoder::Get(), QSymm8PerAxisEncoder::Get(), ScaledInt32PerAxisDecoder::Get(), and QSymm16PerAxisEncoder::Get().

◆ DetectionPostProcess()

void DetectionPostProcess ( const TensorInfo boxEncodingsInfo,
const TensorInfo scoresInfo,
const TensorInfo anchorsInfo,
const TensorInfo detectionBoxesInfo,
const TensorInfo detectionClassesInfo,
const TensorInfo detectionScoresInfo,
const TensorInfo numDetectionsInfo,
const DetectionPostProcessDescriptor desc,
Decoder< float > &  boxEncodings,
Decoder< float > &  scores,
Decoder< float > &  anchors,
float *  detectionBoxes,
float *  detectionClasses,
float *  detectionScores,
float *  numDetections 
)

Definition at line 141 of file DetectionPostProcess.cpp.

156 {
157  IgnoreUnused(anchorsInfo, detectionClassesInfo, detectionScoresInfo, numDetectionsInfo);
158 
159  // Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format,
160  // which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax)
161  std::vector<float> boxCorners(boxEncodingsInfo.GetNumElements());
162 
163  const unsigned int numBoxes = boxEncodingsInfo.GetShape()[1];
164  const unsigned int numScores = scoresInfo.GetNumElements();
165 
166  for (unsigned int i = 0; i < numBoxes; ++i)
167  {
168  // Y
169  float boxEncodingY = boxEncodings.Get();
170  float anchorY = anchors.Get();
171 
172  ++boxEncodings;
173  ++anchors;
174 
175  // X
176  float boxEncodingX = boxEncodings.Get();
177  float anchorX = anchors.Get();
178 
179  ++boxEncodings;
180  ++anchors;
181 
182  // H
183  float boxEncodingH = boxEncodings.Get();
184  float anchorH = anchors.Get();
185 
186  ++boxEncodings;
187  ++anchors;
188 
189  // W
190  float boxEncodingW = boxEncodings.Get();
191  float anchorW = anchors.Get();
192 
193  ++boxEncodings;
194  ++anchors;
195 
196  float yCentre = boxEncodingY / desc.m_ScaleY * anchorH + anchorY;
197  float xCentre = boxEncodingX / desc.m_ScaleX * anchorW + anchorX;
198 
199  float halfH = 0.5f * expf(boxEncodingH / desc.m_ScaleH) * anchorH;
200  float halfW = 0.5f * expf(boxEncodingW / desc.m_ScaleW) * anchorW;
201 
202  unsigned int indexY = i * 4;
203  unsigned int indexX = indexY + 1;
204  unsigned int indexH = indexX + 1;
205  unsigned int indexW = indexH + 1;
206 
207  // ymin
208  boxCorners[indexY] = yCentre - halfH;
209  // xmin
210  boxCorners[indexX] = xCentre - halfW;
211  // ymax
212  boxCorners[indexH] = yCentre + halfH;
213  // xmax
214  boxCorners[indexW] = xCentre + halfW;
215 
216  ARMNN_ASSERT(boxCorners[indexY] < boxCorners[indexH]);
217  ARMNN_ASSERT(boxCorners[indexX] < boxCorners[indexW]);
218  }
219 
220  unsigned int numClassesWithBg = desc.m_NumClasses + 1;
221 
222  // Decode scores
223  std::vector<float> decodedScores;
224  decodedScores.reserve(numScores);
225 
226  for (unsigned int i = 0u; i < numScores; ++i)
227  {
228  decodedScores.emplace_back(scores.Get());
229  ++scores;
230  }
231 
232  // Perform Non Max Suppression.
233  if (desc.m_UseRegularNms)
234  {
235  // Perform Regular NMS.
236  // For each class, perform NMS and select max detection numbers of the highest score across all classes.
237  std::vector<float> classScores(numBoxes);
238 
239  std::vector<unsigned int> selectedBoxesAfterNms;
240  selectedBoxesAfterNms.reserve(numBoxes);
241 
242  std::vector<float> selectedScoresAfterNms;
243  selectedBoxesAfterNms.reserve(numScores);
244 
245  std::vector<unsigned int> selectedClasses;
246 
247  for (unsigned int c = 0; c < desc.m_NumClasses; ++c)
248  {
249  // For each boxes, get scores of the boxes for the class c.
250  for (unsigned int i = 0; i < numBoxes; ++i)
251  {
252  classScores[i] = decodedScores[i * numClassesWithBg + c + 1];
253  }
254  std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes,
255  boxCorners,
256  classScores,
257  desc.m_NmsScoreThreshold,
258  desc.m_DetectionsPerClass,
259  desc.m_NmsIouThreshold);
260 
261  for (unsigned int i = 0; i < selectedIndices.size(); ++i)
262  {
263  selectedBoxesAfterNms.push_back(selectedIndices[i]);
264  selectedScoresAfterNms.push_back(classScores[selectedIndices[i]]);
265  selectedClasses.push_back(c);
266  }
267  }
268 
269  // Select max detection numbers of the highest score across all classes
270  unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedBoxesAfterNms.size());
271  unsigned int numOutput = std::min(desc.m_MaxDetections, numSelected);
272 
273  // Sort the max scores among the selected indices.
274  std::vector<unsigned int> outputIndices = GenerateRangeK(numSelected);
275  TopKSort(numOutput, outputIndices.data(), selectedScoresAfterNms.data(), numSelected);
276 
277  AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, outputIndices,
278  selectedBoxesAfterNms, selectedClasses, selectedScoresAfterNms,
279  detectionBoxes, detectionScores, detectionClasses, numDetections);
280  }
281  else
282  {
283  // Perform Fast NMS.
284  // Select max scores of boxes and perform NMS on max scores,
285  // select max detection numbers of the highest score
286  unsigned int numClassesPerBox = std::min(desc.m_MaxClassesPerDetection, desc.m_NumClasses);
287  std::vector<float> maxScores;
288  std::vector<unsigned int>boxIndices;
289  std::vector<unsigned int>maxScoreClasses;
290 
291  for (unsigned int box = 0; box < numBoxes; ++box)
292  {
293  unsigned int scoreIndex = box * numClassesWithBg + 1;
294 
295  // Get the max scores of the box.
296  std::vector<unsigned int> maxScoreIndices = GenerateRangeK(desc.m_NumClasses);
297  TopKSort(numClassesPerBox, maxScoreIndices.data(),
298  decodedScores.data() + scoreIndex, desc.m_NumClasses);
299 
300  for (unsigned int i = 0; i < numClassesPerBox; ++i)
301  {
302  maxScores.push_back(decodedScores[scoreIndex + maxScoreIndices[i]]);
303  maxScoreClasses.push_back(maxScoreIndices[i]);
304  boxIndices.push_back(box);
305  }
306  }
307 
308  // Perform NMS on max scores
309  std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, maxScores,
310  desc.m_NmsScoreThreshold,
311  desc.m_MaxDetections,
312  desc.m_NmsIouThreshold);
313 
314  unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedIndices.size());
315  unsigned int numOutput = std::min(desc.m_MaxDetections, numSelected);
316 
317  AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, selectedIndices,
318  boxIndices, maxScoreClasses, maxScores,
319  detectionBoxes, detectionScores, detectionClasses, numDetections);
320  }
321 }

References AllocateOutputData(), ARMNN_ASSERT, GenerateRangeK(), Decoder< IType >::Get(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), IgnoreUnused(), DetectionPostProcessDescriptor::m_DetectionsPerClass, DetectionPostProcessDescriptor::m_MaxClassesPerDetection, DetectionPostProcessDescriptor::m_MaxDetections, DetectionPostProcessDescriptor::m_NmsIouThreshold, DetectionPostProcessDescriptor::m_NmsScoreThreshold, DetectionPostProcessDescriptor::m_NumClasses, DetectionPostProcessDescriptor::m_ScaleH, DetectionPostProcessDescriptor::m_ScaleW, DetectionPostProcessDescriptor::m_ScaleX, DetectionPostProcessDescriptor::m_ScaleY, DetectionPostProcessDescriptor::m_UseRegularNms, NonMaxSuppression(), and TopKSort().

◆ ExecuteFunction()

void armnn::ExecuteFunction ( std::vector< ITensorHandle * >  inputs,
std::vector< ITensorHandle * >  outputs,
BinaryOperation  operation 
)

Definition at line 27 of file RefElementwiseBinaryWorkload.cpp.

30 {
31  const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
32  const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]);
33  const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
34 
35  const TensorShape& inShape0 = inputInfo0.GetShape();
36  const TensorShape& inShape1 = inputInfo1.GetShape();
37  const TensorShape& outShape = outputInfo.GetShape();
38 
39  std::unique_ptr<Decoder<DataType>> input0 = MakeDecoder<DataType>(inputInfo0, inputs[0]->Map());
40  std::unique_ptr<Decoder<DataType>> input1 = MakeDecoder<DataType>(inputInfo1, inputs[1]->Map());
41  std::unique_ptr<Encoder<DataType>> output = MakeEncoder<DataType>(outputInfo, outputs[0]->Map());
42 
43  using AddFunction = ElementwiseBinaryFunction<std::plus<DataType>>;
44  using DivFunction = ElementwiseBinaryFunction<std::divides<DataType>>;
45  using MaximumFunction = ElementwiseBinaryFunction<armnn::maximum<DataType>>;
46  using MinimumFunction = ElementwiseBinaryFunction<armnn::minimum<DataType>>;
47  using MulFunction = ElementwiseBinaryFunction<std::multiplies<DataType>>;
48  using SubFunction = ElementwiseBinaryFunction<std::minus<DataType>>;
49  using SqDiffFunction = ElementwiseBinaryFunction<armnn::squaredDifference<DataType>>;
50  using PowerFunction = ElementwiseBinaryFunction<armnn::power<DataType>>;
51 
52  switch (operation)
53  {
54  case BinaryOperation::Add:
55  {
56  AddFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
57  break;
58  }
59  case BinaryOperation::Div:
60  {
61  DivFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
62  break;
63  }
64  case BinaryOperation::Maximum:
65  {
66  MaximumFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
67  break;
68  }
69  case BinaryOperation::Minimum:
70  {
71  MinimumFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
72  break;
73  }
74  case BinaryOperation::Mul:
75  {
76  MulFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
77  break;
78  }
79  case BinaryOperation::Sub:
80  {
81  SubFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
82  break;
83  }
84  case BinaryOperation::SqDiff:
85  {
86  SqDiffFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
87  break;
88  }
89  case BinaryOperation::Power:
90  {
91  PowerFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
92  break;
93  }
94  default:
95  {
96  throw InvalidArgumentException(std::string("Unsupported binary operation ") +
98  }
99  }
100 }

References Add, CHECK_LOCATION, Div, GetBinaryOperationAsCString(), TensorInfo::GetShape(), GetTensorInfo(), Map, Maximum, Minimum, Mul, Power, SqDiff, and Sub.

◆ ExtractJsonObjects()

void armnn::ExtractJsonObjects ( unsigned int  inferenceIndex,
const Event parentEvent,
JsonChildObject parentObject,
std::map< const Event *, std::vector< const Event * >>  descendantsMap 
)

Definition at line 303 of file Profiling.cpp.

307 {
308  ARMNN_ASSERT(parentEvent);
309 
310  // If profiling GUID is entered, process it
311  if (parentEvent->GetProfilingGuid().has_value())
312  {
313  arm::pipe::ProfilingGuid profilingGuid;
314  profilingGuid = parentEvent->GetProfilingGuid().value();
315  parentObject.SetGuid(profilingGuid);
316  }
317  std::vector<Measurement> instrumentMeasurements = parentEvent->GetMeasurements();
318  unsigned int childIdx = 0;
319  unsigned int numSkippedKernels = 0;
320  if (inferenceIndex > 0)
321  {
322  for (auto &i: parentEvent->GetInstruments())
323  {
324  if (i->HasKernelMeasurements())
325  {
326  numSkippedKernels = static_cast<unsigned int>(parentObject.m_Children.size() -
327  instrumentMeasurements.size());
328  childIdx = numSkippedKernels;
329  }
330  }
331  }
332 
333  for (size_t measurementIndex = 0; measurementIndex < instrumentMeasurements.size(); ++measurementIndex, ++childIdx)
334  {
335  if (inferenceIndex == 0)
336  {
337  // Only add kernel measurement once, in case of multiple inferences
338  JsonChildObject measurementObject{ instrumentMeasurements[measurementIndex].m_Name };
339  measurementObject.SetUnit(instrumentMeasurements[measurementIndex].m_Unit);
340  measurementObject.SetType(JsonObjectType::Measurement);
341 
342  ARMNN_ASSERT(parentObject.NumChildren() == childIdx);
343  parentObject.AddChild(measurementObject);
344  }
345  else
346  {
347  if (numSkippedKernels > 0)
348  {
349  parentObject.GetChild(--numSkippedKernels).AddMeasurement(0.0);
350  }
351  }
352 
353  parentObject.GetChild(childIdx).AddMeasurement(instrumentMeasurements[measurementIndex].m_Value);
354  }
355 
356  auto childEventsIt = descendantsMap.find(parentEvent);
357  if (childEventsIt != descendantsMap.end())
358  {
359  for (auto childEvent : childEventsIt->second)
360  {
361  if (inferenceIndex == 0)
362  {
363  // Only add second level once, in case of multiple inferences
364  JsonChildObject childObject{ childEvent->GetName() };
365  childObject.SetType(JsonObjectType::Event);
366  parentObject.AddChild(childObject);
367  }
368 
369  // It's possible that childIdx can overrun the parents' child vector. Check before we try to process a
370  // non-existent child.
371  if (childIdx < parentObject.NumChildren())
372  {
373  // Recursively process children.
374  ExtractJsonObjects(inferenceIndex, childEvent, parentObject.GetChild(childIdx), descendantsMap);
375  childIdx++;
376  }
377  }
378  }
379 }

References JsonChildObject::AddChild(), JsonChildObject::AddMeasurement(), ARMNN_ASSERT, Event, JsonChildObject::GetChild(), Event::GetInstruments(), Event::GetMeasurements(), Event::GetProfilingGuid(), OptionalBase::has_value(), JsonChildObject::m_Children, Measurement, JsonChildObject::NumChildren(), JsonChildObject::SetGuid(), JsonChildObject::SetType(), JsonChildObject::SetUnit(), and OptionalReferenceSwitch< IsReference, T >::value().

Referenced by ProfilerImpl::Print().

◆ FakeQuantization()

void armnn::FakeQuantization ( const float *  inputData,
float *  outputData,
uint32_t  numElements,
float  min,
float  max 
)

Definition at line 17 of file RefFakeQuantizationFloat32Workload.cpp.

18 {
19  float scale = (max - min) / 255.f;
20  int32_t offset = armnn::numeric_cast<int32_t>((-min * 255.f) / (max - min));
21 
22  for (uint32_t i = 0; i < numElements; i++)
23  {
24  outputData[i] = static_cast<float>(armnn::Quantize<uint8_t>(inputData[i], scale, offset));
25  }
26 
27 }

◆ FalseFunc()

bool armnn::FalseFunc ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 62 of file LayerSupportCommon.hpp.

63 {
64  IgnoreUnused(reasonIfUnsupported);
65  IgnoreUnused(params...);
66  return false;
67 }

References IgnoreUnused().

◆ FalseFuncF16()

bool armnn::FalseFuncF16 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 70 of file LayerSupportCommon.hpp.

71 {
72  IgnoreUnused(params...);
73  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type");
74  return false;
75 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseFuncF32()

bool armnn::FalseFuncF32 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 78 of file LayerSupportCommon.hpp.

79 {
80  IgnoreUnused(params...);
81  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type");
82  return false;
83 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseFuncI32()

bool armnn::FalseFuncI32 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 94 of file LayerSupportCommon.hpp.

95 {
96  IgnoreUnused(params...);
97  SetValueChecked(reasonIfUnsupported, "Layer is not supported with int32 data type");
98  return false;
99 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseFuncU8()

bool armnn::FalseFuncU8 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 86 of file LayerSupportCommon.hpp.

87 {
88  IgnoreUnused(params...);
89  SetValueChecked(reasonIfUnsupported, "Layer is not supported with 8-bit data type");
90  return false;
91 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseInputFuncF16()

bool armnn::FalseInputFuncF16 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 110 of file LayerSupportCommon.hpp.

111 {
112  IgnoreUnused(params...);
113  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type input");
114  return false;
115 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseInputFuncF32()

bool armnn::FalseInputFuncF32 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 102 of file LayerSupportCommon.hpp.

103 {
104  IgnoreUnused(params...);
105  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type input");
106  return false;
107 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseOutputFuncF16()

bool armnn::FalseOutputFuncF16 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 126 of file LayerSupportCommon.hpp.

127 {
128  IgnoreUnused(params...);
129  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type output");
130  return false;
131 }

References IgnoreUnused(), and SetValueChecked().

◆ FalseOutputFuncF32()

bool armnn::FalseOutputFuncF32 ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 118 of file LayerSupportCommon.hpp.

119 {
120  IgnoreUnused(params...);
121  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type output");
122  return false;
123 }

References IgnoreUnused(), and SetValueChecked().

◆ Fill()

void Fill ( Encoder< float > &  output,
const TensorShape desiredOutputShape,
const float  value 
)

Creates a tensor and fills it with a scalar value.

Definition at line 13 of file Fill.cpp.

16 {
17  for(unsigned int i = 0; i < desiredOutputShape.GetNumElements(); ++i)
18  {
19  output[i];
20  output.Set(value);
21  }
22 }

References TensorShape::GetNumElements(), and Encoder< IType >::Set().

◆ FindKernelMeasurements()

std::vector<Measurement> armnn::FindKernelMeasurements ( const Event event)

Definition at line 62 of file Profiling.cpp.

63 {
64  ARMNN_ASSERT(event != nullptr);
65 
66  std::vector<Measurement> measurements;
67 
68  // Search through the measurements.
69  for (const auto& measurement : event->GetMeasurements())
70  {
71  if (measurement.m_Name.rfind("OpenClKernelTimer", 0) == 0
72  || measurement.m_Name.rfind("NeonKernelTimer", 0) == 0)
73  {
74  // Measurement found.
75  measurements.push_back(measurement);
76  }
77  }
78 
79  return measurements;
80 }

References ARMNN_ASSERT, and Event::GetMeasurements().

◆ FindMeasurement()

Measurement armnn::FindMeasurement ( const std::string &  name,
const Event event 
)

Definition at line 43 of file Profiling.cpp.

44 {
45 
46  ARMNN_ASSERT(event != nullptr);
47 
48  // Search though the measurements.
49  for (const auto& measurement : event->GetMeasurements())
50  {
51  if (measurement.m_Name == name)
52  {
53  // Measurement found.
54  return measurement;
55  }
56  }
57 
58  // Measurement not found.
59  return Measurement{ "", 0.f, Measurement::Unit::TIME_MS };
60 }

References ARMNN_ASSERT, and Event::GetMeasurements().

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults(), and ProfilerImpl::CalculateProfilingEventStats().

◆ FinishClContextBuffer()

void armnn::FinishClContextBuffer ( flatbuffers::FlatBufferBuilder &  fbb,
flatbuffers::Offset< armnn::ClContext >  root 
)
inline

Definition at line 171 of file ClContextSchema_generated.h.

173  {
174  fbb.Finish(root, ClContextIdentifier());
175 }

References ClContextIdentifier().

◆ FinishSizePrefixedClContextBuffer()

void armnn::FinishSizePrefixedClContextBuffer ( flatbuffers::FlatBufferBuilder &  fbb,
flatbuffers::Offset< armnn::ClContext >  root 
)
inline

Definition at line 177 of file ClContextSchema_generated.h.

179  {
180  fbb.FinishSizePrefixed(root, ClContextIdentifier());
181 }

References ClContextIdentifier().

◆ FoldPadIntoAveragePool2d()

LayerType* armnn::FoldPadIntoAveragePool2d ( OptimizationViews optimizationViews,
Pooling2dLayer baseLayer,
Pooling2dDescriptor poolDescriptor,
PadLayer padLayer 
)

Definition at line 341 of file SubgraphUtils.hpp.

345 {
346  IConnectableLayer* replacement =
347  optimizationViews.GetINetwork()->AddPooling2dLayer(poolDescriptor, "folded-pad-into-pool2d");
348  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
349 
350  FoldPadLayer(optimizationViews,
351  baseLayer,
352  replacementLayer,
353  padLayer);
354 
355  return replacementLayer;
356 }

References INetwork::AddPooling2dLayer(), FoldPadLayer(), and OptimizationViews::GetINetwork().

◆ FoldPadLayer()

LayerType* armnn::FoldPadLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
LayerType replacementLayer,
PadLayer padLayer 
)

Definition at line 234 of file SubgraphUtils.hpp.

238 {
239  SubgraphView substitutionSubgraph({padLayer, baseLayer},
240  CreateIInputsFrom({padLayer}),
241  CreateIOutputsFrom({baseLayer}));
242  SubgraphView replacementSubgraph(replacementLayer);
243 
244  optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
245 
246  return replacementLayer;
247 }

References OptimizationViews::AddSubstitution().

Referenced by FoldPadIntoAveragePool2d().

◆ ForEachLayerInput()

void armnn::ForEachLayerInput ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo,
Delegate  function 
)

Definition at line 267 of file SubgraphViewSelector.cpp.

270 {
271  Layer& layer = *PolymorphicDowncast<Layer*>(layerInfo.m_Layer);
272 
273  for (auto inputSlot : layer.GetInputSlots())
274  {
275  auto connectedInput = PolymorphicDowncast<OutputSlot*>(inputSlot.GetConnection());
276  ARMNN_ASSERT_MSG(connectedInput, "Dangling input slot detected.");
277  Layer& inputLayer = connectedInput->GetOwningLayer();
278 
279  auto parentInfo = layerInfos.find(&inputLayer);
280  if (parentInfo != layerInfos.end())
281  {
282  function(parentInfo->second);
283  }
284  }
285 }

References ARMNN_ASSERT_MSG, and Layer::GetInputSlots().

Referenced by AssignSplitId(), and IsReadyForSplitAssignment().

◆ ForEachLayerOutput()

void armnn::ForEachLayerOutput ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo,
Delegate  function 
)

Definition at line 288 of file SubgraphViewSelector.cpp.

291 {
292  Layer& layer = *PolymorphicDowncast<Layer*>(layerInfo.m_Layer);
293 
294  for (auto& outputSlot : layer.GetOutputSlots())
295  {
296  for (auto& output : outputSlot.GetConnections())
297  {
298  Layer& childLayer = output->GetOwningLayer();
299 
300  auto childInfo = layerInfos.find(&childLayer);
301  if (childInfo != layerInfos.end())
302  {
303  function(childInfo->second);
304  }
305  }
306  }
307 }

References Layer::GetOutputSlots().

Referenced by SubgraphViewSelector::SelectSubgraphs().

◆ FullyConnected()

void FullyConnected ( const TensorShape rInputShape,
Decoder< float > &  rInputDecoder,
const TensorShape rOutputShape,
Encoder< float > &  rOutputEncoder,
const TensorShape rWeightsShape,
Decoder< float > &  rWeightDecoder,
Decoder< float > *  pBiasDecoder,
const bool  biasEnabled,
const unsigned int  K,
const bool  transposeWeights 
)

Performs a matrix multiplication and optionally adds a bias.

Definition at line 15 of file FullyConnected.cpp.

25 {
26  // Perform FullyConnected implementation
27  unsigned int outputSize = rOutputShape[1];
28 
29  const std::vector<float> decodedInputs = rInputDecoder.DecodeTensor(rInputShape);
30  const std::vector<float> decodedWeights = rWeightDecoder.DecodeTensor(rWeightsShape);
31 
32  const TensorShape biasShape{outputSize};
33 
34  ARMNN_ASSERT(!biasEnabled || pBiasDecoder != nullptr);
35  const std::vector<float> decodedBiases = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
36 
37 
38  for (unsigned int n = 0; n < rInputShape[0]; n++)
39  {
40  for (unsigned int channelOutput = 0; channelOutput < outputSize; channelOutput++)
41  {
42  float outval = 0.f;
43 
44  for (unsigned int channelInput = 0; channelInput < K; channelInput++)
45  {
46  float weight;
47  if (transposeWeights)
48  {
49  weight = decodedWeights[channelOutput * K + channelInput];
50  }
51  else
52  {
53  weight = decodedWeights[channelInput * outputSize + channelOutput];
54  }
55 
56  outval += weight * decodedInputs[n * K + channelInput];
57  }
58 
59  if (biasEnabled)
60  {
61  outval += decodedBiases[channelOutput];
62  }
63 
64  rOutputEncoder[n * outputSize + channelOutput];
65  rOutputEncoder.Set(outval);
66  }
67  }
68 }

References ARMNN_ASSERT, Decoder< IType >::DecodeTensor(), and Encoder< IType >::Set().

◆ FuseAdditionLayer()

LayerType* armnn::FuseAdditionLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 74 of file ArmComputeSubgraphUtils.hpp.

79 {
81  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddAdditionLayer(name.c_str());
83  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
84 
85  FuseLayer(optimizationViews,
86  baseLayer,
87  replacementLayer,
88  activationLayer,
89  activationDesc);
90 
91  return replacementLayer;
92 }

References INetwork::AddAdditionLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseBatchNormalizationLayer()

LayerType* armnn::FuseBatchNormalizationLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 179 of file ArmComputeSubgraphUtils.hpp.

184 {
185  IConnectableLayer* replacement =
186  optimizationViews.GetINetwork()->AddBatchNormalizationLayer(baseLayer->GetParameters(),
187  ConstTensor(),
188  ConstTensor(),
189  ConstTensor(),
190  ConstTensor(),
191  name.c_str());
192  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
193 
194  FuseLayer(optimizationViews,
195  baseLayer,
196  replacementLayer,
197  activationLayer,
198  activationDesc);
199 
200  SubgraphView substitutionSubgraph({baseLayer, activationLayer},
201  CreateIInputsFrom({baseLayer}),
202  CreateIOutputsFrom({activationLayer}));
203  SubgraphView replacementSubgraph(replacementLayer);
204 
205  return replacementLayer;
206 }

References INetwork::AddBatchNormalizationLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseConvolution2dLayer()

LayerType* armnn::FuseConvolution2dLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 209 of file ArmComputeSubgraphUtils.hpp.

214 {
215  IConnectableLayer* replacement = optimizationViews.GetINetwork()
216  ->AddConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
217 
218  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
219 
220 
221  FuseLayer(optimizationViews,
222  baseLayer,
223  replacementLayer,
224  activationLayer,
225  activationDesc);
226 
227  return replacementLayer;
228 }

References INetwork::AddConvolution2dLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseDepthwiseConvolution2dLayer()

LayerType* armnn::FuseDepthwiseConvolution2dLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 231 of file ArmComputeSubgraphUtils.hpp.

236 {
237  IConnectableLayer* replacement =
238  optimizationViews.GetINetwork()->AddDepthwiseConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
239 
240  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
241 
242 
243  FuseLayer(optimizationViews,
244  baseLayer,
245  replacementLayer,
246  activationLayer,
247  activationDesc);
248 
249  return replacementLayer;
250 }

References INetwork::AddDepthwiseConvolution2dLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseDivisionLayer()

LayerType* armnn::FuseDivisionLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 116 of file ArmComputeSubgraphUtils.hpp.

121 {
123  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddDivisionLayer(name.c_str());
125  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
126 
127  FuseLayer(optimizationViews,
128  baseLayer,
129  replacementLayer,
130  activationLayer,
131  activationDesc);
132 
133  return replacementLayer;
134 }

References INetwork::AddDivisionLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseElementwiseBinaryLayer()

LayerType* armnn::FuseElementwiseBinaryLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
BinaryOperation  operation,
std::string  name 
)

Definition at line 158 of file ArmComputeSubgraphUtils.hpp.

164 {
165  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddElementwiseBinaryLayer(operation,
166  name.c_str());
167  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
168 
169  FuseLayer(optimizationViews,
170  baseLayer,
171  replacementLayer,
172  activationLayer,
173  activationDesc);
174 
175  return replacementLayer;
176 }

References INetwork::AddElementwiseBinaryLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseFullyConnectedLayer()

LayerType* armnn::FuseFullyConnectedLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 253 of file ArmComputeSubgraphUtils.hpp.

258 {
259  IConnectableLayer* replacement =
260  optimizationViews.GetINetwork()->AddFullyConnectedLayer(baseLayer->GetParameters(),
261  name.c_str());
262  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
263 
264  FuseLayer(optimizationViews,
265  baseLayer,
266  replacementLayer,
267  activationLayer,
268  activationDesc);
269 
270 
271  return replacementLayer;
272 }

References INetwork::AddFullyConnectedLayer(), FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseLayer()

LayerType* armnn::FuseLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
LayerType replacementLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc 
)

Definition at line 54 of file ArmComputeSubgraphUtils.hpp.

59 {
60  replacementLayer->SetAdditionalInfoForObject(
61  std::make_shared<ActivationDescriptor>(activationDesc));
62 
63  SubgraphView substitutionSubgraph({baseLayer, activationLayer},
64  CreateIInputsFrom({baseLayer}),
65  CreateIOutputsFrom({activationLayer}));
66  SubgraphView replacementSubgraph(replacementLayer);
67 
68  optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
69 
70  return replacementLayer;
71 }

References OptimizationViews::AddSubstitution().

Referenced by FuseAdditionLayer(), FuseBatchNormalizationLayer(), FuseConvolution2dLayer(), FuseDepthwiseConvolution2dLayer(), FuseDivisionLayer(), FuseElementwiseBinaryLayer(), FuseFullyConnectedLayer(), FuseMultiplicationLayer(), and FuseSubtractionLayer().

◆ FuseMultiplicationLayer()

LayerType* armnn::FuseMultiplicationLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 137 of file ArmComputeSubgraphUtils.hpp.

142 {
144  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddMultiplicationLayer(name.c_str());
146  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
147 
148  FuseLayer(optimizationViews,
149  baseLayer,
150  replacementLayer,
151  activationLayer,
152  activationDesc);
153 
154  return replacementLayer;
155 }

References INetwork::AddMultiplicationLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ FuseSubtractionLayer()

LayerType* armnn::FuseSubtractionLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 95 of file ArmComputeSubgraphUtils.hpp.

100 {
102  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddSubtractionLayer(name.c_str());
104  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
105 
106  FuseLayer(optimizationViews,
107  baseLayer,
108  replacementLayer,
109  activationLayer,
110  activationDesc);
111 
112  return replacementLayer;
113 }

References INetwork::AddSubtractionLayer(), ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, FuseLayer(), and OptimizationViews::GetINetwork().

◆ Gather()

void Gather ( const TensorInfo paramsInfo,
const TensorInfo indicesInfo,
const TensorInfo outputInfo,
Decoder< float > &  params,
const int32_t *  indices,
Encoder< float > &  output,
const int32_t  axis_int 
)

Definition at line 15 of file Gather.cpp.

22 {
23  IgnoreUnused(outputInfo);
24 
25  const int paramsRank = static_cast<int>(paramsInfo.GetNumDimensions());
26  if((axis_int < -1 * paramsRank) || (paramsRank <= axis_int))
27  {
28  throw InvalidArgumentException((fmt::format("Gather: Axis {} is not within [-{}, {}) range",
29  axis_int, paramsRank, paramsRank)));
30  }
31  const unsigned int axis = (axis_int < 0) ? static_cast<unsigned int>(paramsRank + axis_int)
32  : static_cast<unsigned int>(axis_int);
33 
34  const TensorShape& paramsShape = paramsInfo.GetShape();
35 
36  // Product of all dimensions to the left side of the axis
37  unsigned int paramsOuterProduct = 1;
38  for (unsigned int i = 0; i < axis; ++i)
39  {
40  paramsOuterProduct *= paramsShape[i];
41  }
42  // Product of all dimensions to the right side of the axis
43  unsigned int paramsInnerProduct = 1;
44  for (unsigned int k = 1 + axis; k < paramsInfo.GetNumDimensions(); ++k)
45  {
46  paramsInnerProduct *= paramsShape[k];
47  }
48 
49  unsigned int offset = 0;
50  unsigned int outIndex = 0;
51  for (unsigned int i = 0; i < paramsOuterProduct; ++i)
52  {
53  for (unsigned int j = 0; j < indicesInfo.GetNumElements(); ++j)
54  {
55  unsigned int index =
56  (indices[j] < 0) ? static_cast<unsigned int>(static_cast<int>(paramsShape[axis]) + indices[j])
57  : static_cast<unsigned int>(indices[j]);
58 
59  if (index >= paramsShape[axis])
60  {
61  throw InvalidArgumentException((fmt::format("Gather: index >= paramsShape[axis]: {} >= {}",
62  index, paramsShape[axis] )));
63  }
64 
65  unsigned int startOffset = (paramsInnerProduct * index) + offset;
66  unsigned int endOffset = startOffset + paramsInnerProduct;
67 
68  for (unsigned int k = startOffset; k < endOffset; ++k)
69  {
70  params[k];
71  float outputValue = params.Get();
72  output[outIndex];
73  output.Set(outputValue);
74  ++outIndex;
75  }
76  }
77  offset += paramsShape[axis] * paramsInnerProduct;
78  }
79 
80  if (outIndex != outputInfo.GetNumElements())
81  {
82  throw InvalidArgumentException((fmt::format("Gather: Invalid outIndex {} ", outIndex)));
83  }
84 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), IgnoreUnused(), and Encoder< IType >::Set().

◆ GatherTensorHandlePairs()

void armnn::GatherTensorHandlePairs ( const DescriptorType &  descriptor,
std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType * >> &  tensorHandlePairs 
)

Definition at line 204 of file WorkloadUtils.hpp.

206 {
207  const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
208  tensorHandlePairs.reserve(numInputs);
209 
210  for (unsigned int i = 0; i < numInputs; ++i)
211  {
212  SrcTensorHandleType* const srcTensorHandle =
213  PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
214  DstTensorHandleType* const dstTensorHandle =
215  PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
216 
217  tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
218  }
219 }

Referenced by CopyMemGenericWorkload::CopyMemGenericWorkload(), CopyMemGenericWorkload::ExecuteAsync(), NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(), and NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload().

◆ GeneratePermutationVectorOnLastTwoDimensions()

armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions ( unsigned int  rank)

Generates a permutation vector of size rank that permutes the 2 most right dimensions.

Parameters
rank- Tensor rank, i.e. number of dimensions in the tensors
Returns
- A permutation vector that permutes the 2 last dimensions

Definition at line 356 of file WorkloadUtils.cpp.

357 {
358  armnn::PermutationVector permutationVector{};
359  switch (rank)
360  {
361  case 2:
362  permutationVector = {1U, 0U};
363  break;
364  case 3:
365  permutationVector = {0U, 2U, 1U};
366  break;
367  case 4:
368  permutationVector = {0U, 1U, 3U, 2U};
369  break;
370  default:
371  throw Exception("Invalid number of dimensions.");
372  }
373  return permutationVector;
374 }

◆ GenerateRangeK()

std::vector<unsigned int> armnn::GenerateRangeK ( unsigned int  k)

Definition at line 18 of file DetectionPostProcess.cpp.

19 {
20  std::vector<unsigned int> range(k);
21  std::iota(range.begin(), range.end(), 0);
22  return range;
23 }

Referenced by DetectionPostProcess(), and NonMaxSuppression().

◆ GetActivationFunctionAsCString()

constexpr char const* armnn::GetActivationFunctionAsCString ( ActivationFunction  activation)
constexpr

Definition at line 31 of file TypesUtils.hpp.

32 {
33  switch (activation)
34  {
35  case ActivationFunction::Sigmoid: return "Sigmoid";
36  case ActivationFunction::TanH: return "TanH";
37  case ActivationFunction::Linear: return "Linear";
38  case ActivationFunction::ReLu: return "ReLu";
39  case ActivationFunction::BoundedReLu: return "BoundedReLu";
40  case ActivationFunction::SoftReLu: return "SoftReLu";
41  case ActivationFunction::LeakyReLu: return "LeakyReLu";
42  case ActivationFunction::Abs: return "Abs";
43  case ActivationFunction::Sqrt: return "Sqrt";
44  case ActivationFunction::Square: return "Square";
45  case ActivationFunction::Elu: return "Elu";
46  case ActivationFunction::HardSwish: return "HardSwish";
47  case ActivationFunction::Gelu: return "Gelu";
48  default: return "Unknown";
49  }
50 }

References Abs, BoundedReLu, Elu, Gelu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by StringifyLayerParameters< ActivationDescriptor >::Serialize().

◆ GetArgMinMaxFunctionAsCString()

constexpr char const* armnn::GetArgMinMaxFunctionAsCString ( ArgMinMaxFunction  function)
constexpr

Definition at line 52 of file TypesUtils.hpp.

53 {
54  switch (function)
55  {
56  case ArgMinMaxFunction::Max: return "Max";
57  case ArgMinMaxFunction::Min: return "Min";
58  default: return "Unknown";
59  }
60 }

References Max, and Min.

◆ GetBiasDataType()

DataType GetBiasDataType ( DataType  inputDataType)

Definition at line 28 of file WorkloadData.cpp.

29 {
30  switch (inputDataType)
31  {
32  case DataType::Float16:
33  return DataType::Float16;
34  case DataType::BFloat16:
35  case DataType::Float32:
36  return DataType::Float32;
37  case DataType::QAsymmS8:
38  case DataType::QAsymmU8:
39  case DataType::QSymmS8:
40  case DataType::QSymmS16:
41  return DataType::Signed32;
42  default:
43  ARMNN_ASSERT_MSG(false, "Invalid input data type");
44  return DataType::Float32;
45  }
46 }

References ARMNN_ASSERT_MSG, and BFloat16.

Referenced by FullyConnectedQueueDescriptor::Validate(), Convolution2dQueueDescriptor::Validate(), Convolution3dQueueDescriptor::Validate(), DepthwiseConvolution2dQueueDescriptor::Validate(), and TransposeConvolution2dQueueDescriptor::Validate().

◆ GetBiasTypeFromWeightsType()

armnn::Optional< armnn::DataType > GetBiasTypeFromWeightsType ( armnn::Optional< armnn::DataType weightsType)
inline

Definition at line 14 of file LayerSupportRules.hpp.

15 {
16  if (!weightsType)
17  {
18  return weightsType;
19  }
20 
21  switch(weightsType.value())
22  {
25  return weightsType;
31  default:
32  ARMNN_ASSERT_MSG(false, "GetBiasTypeFromWeightsType(): Unsupported data type.");
33  }
34  return armnn::EmptyOptional();
35 }

References ARMNN_ASSERT_MSG, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by BiasAndWeightsTypesMatch::BiasAndWeightsTypesMatch().

◆ GetBinaryOperationAsCString()

constexpr char const* armnn::GetBinaryOperationAsCString ( BinaryOperation  operation)
constexpr

Definition at line 76 of file TypesUtils.hpp.

77 {
78  switch (operation)
79  {
80  case BinaryOperation::Add: return "Add";
81  case BinaryOperation::Div: return "Div";
82  case BinaryOperation::Maximum: return "Maximum";
83  case BinaryOperation::Minimum: return "Minimum";
84  case BinaryOperation::Mul: return "Mul";
85  case BinaryOperation::Power: return "Power";
86  case BinaryOperation::SqDiff: return "SqDiff";
87  case BinaryOperation::Sub: return "Sub";
88  default: return "Unknown";
89  }
90 }

References Add, Div, Maximum, Minimum, Mul, Power, SqDiff, and Sub.

Referenced by ExecuteFunction(), and StringifyLayerParameters< ElementwiseBinaryDescriptor >::Serialize().

◆ GetCapability() [1/2]

Optional< const BackendOptions::BackendOption > GetCapability ( const std::string &  backendCapabilityName,
const armnn::BackendId backend 
)

Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted.

Definition at line 51 of file BackendHelper.cpp.

53 {
54  auto const& backendRegistry = armnn::BackendRegistryInstance();
55  if (backendRegistry.IsBackendRegistered(backend))
56  {
57  auto factoryFunc = backendRegistry.GetFactory(backend);
58  auto backendObject = factoryFunc();
59  auto capabilities = backendObject->GetCapabilities();
60  return GetCapability(backendCapabilityName, capabilities);
61  }
62  return EmptyOptional();
63 }

References BackendRegistryInstance(), and GetCapability().

◆ GetCapability() [2/2]

Optional< const BackendOptions::BackendOption > GetCapability ( const std::string &  backendCapabilityName,
const BackendCapabilities capabilities 
)

Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted.

Definition at line 37 of file BackendHelper.cpp.

39 {
40  for (size_t i=0; i < capabilities.GetOptionCount(); i++)
41  {
42  const auto& capability = capabilities.GetOption(i);
43  if (backendCapabilityName == capability.GetName())
44  {
45  return capability;
46  }
47  }
48  return EmptyOptional();
49 }

References BackendOptions::GetOption(), and BackendOptions::GetOptionCount().

Referenced by GetCapability(), HasCapability(), LayerSupportHandle::IsConvolution2dSupported(), LayerSupportHandle::IsDepthwiseConvolutionSupported(), LayerSupportHandle::IsDilatedDepthwiseConvolutionSupported(), and LayerSupportHandle::IsFullyConnectedSupported().

◆ GetClContext()

const armnn::ClContext* armnn::GetClContext ( const void *  buf)
inline

Definition at line 140 of file ClContextSchema_generated.h.

140  {
141  return flatbuffers::GetRoot<armnn::ClContext>(buf);
142 }

Referenced by ClContextDeserializer::DeserializeFromBinary().

◆ GetComparisonOperationAsCString()

constexpr char const* armnn::GetComparisonOperationAsCString ( ComparisonOperation  operation)
constexpr

Definition at line 62 of file TypesUtils.hpp.

63 {
64  switch (operation)
65  {
66  case ComparisonOperation::Equal: return "Equal";
67  case ComparisonOperation::Greater: return "Greater";
68  case ComparisonOperation::GreaterOrEqual: return "GreaterOrEqual";
69  case ComparisonOperation::Less: return "Less";
70  case ComparisonOperation::LessOrEqual: return "LessOrEqual";
71  case ComparisonOperation::NotEqual: return "NotEqual";
72  default: return "Unknown";
73  }
74 }

References Equal, Greater, GreaterOrEqual, Less, LessOrEqual, and NotEqual.

Referenced by StringifyLayerParameters< ComparisonDescriptor >::Serialize().

◆ GetComputeDeviceAsCString()

constexpr char const* armnn::GetComputeDeviceAsCString ( Compute  compute)
constexpr

Deprecated function that will be removed together with the Compute enum.

Definition at line 34 of file BackendId.hpp.

35 {
36  switch (compute)
37  {
38  case armnn::Compute::CpuRef: return "CpuRef";
39  case armnn::Compute::CpuAcc: return "CpuAcc";
40  case armnn::Compute::GpuAcc: return "GpuAcc";
41  default: return "Unknown";
42  }
43 }

References CpuAcc, CpuRef, and GpuAcc.

Referenced by BackendId::BackendId(), BackendId::IsCpuAcc(), BackendId::IsCpuRef(), BackendId::IsGpuAcc(), BackendId::IsUndefined(), and operator<<().

◆ GetConvolutionMethodString()

std::string GetConvolutionMethodString ( arm_compute::ConvolutionMethod &  convolutionMethod)
inline

Definition at line 46 of file ClWorkloadUtils.hpp.

47 {
48  switch (convolutionMethod)
49  {
50  case arm_compute::ConvolutionMethod::FFT:
51  return "FFT";
52  case arm_compute::ConvolutionMethod::DIRECT:
53  return "Direct";
54  case arm_compute::ConvolutionMethod::GEMM:
55  return "GEMM";
56  case arm_compute::ConvolutionMethod::WINOGRAD:
57  return "Winograd";
58  default:
59  return "Unknown";
60  }
61 }

◆ GetDataLayoutName()

◆ GetDataTypeName()

constexpr const char* armnn::GetDataTypeName ( DataType  dataType)
constexpr

Definition at line 233 of file TypesUtils.hpp.

234 {
235  switch (dataType)
236  {
237  case DataType::Float16: return "Float16";
238  case DataType::Float32: return "Float32";
239  case DataType::Signed64: return "Signed64";
240  case DataType::QAsymmU8: return "QAsymmU8";
241  case DataType::QAsymmS8: return "QAsymmS8";
242  case DataType::QSymmS8: return "QSymmS8";
243  case DataType::QSymmS16: return "QSymm16";
244  case DataType::Signed32: return "Signed32";
245  case DataType::Boolean: return "Boolean";
246  case DataType::BFloat16: return "BFloat16";
247 
248  default:
249  return "Unknown";
250  }
251 }

References BFloat16, Boolean, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and Signed64.

Referenced by AttemptBackendAssignment(), RefDebugWorkload< DataType >::GetName(), armnnUtils::GetPerAxisParams(), ConstantLayer::SerializeLayerParameters(), armnnUtils::ToFloatArray(), and VerifyTensorInfoDataType().

◆ GetDataTypeSize()

constexpr unsigned int armnn::GetDataTypeSize ( DataType  dataType)
constexpr

Definition at line 182 of file TypesUtils.hpp.

183 {
184  switch (dataType)
185  {
186  case DataType::BFloat16:
187  case DataType::Float16: return 2U;
188  case DataType::Float32:
189  case DataType::Signed32: return 4U;
190  case DataType::Signed64: return 8U;
191  case DataType::QAsymmU8: return 1U;
192  case DataType::QAsymmS8: return 1U;
193  case DataType::QSymmS8: return 1U;
194  case DataType::QSymmS16: return 2U;
195  case DataType::Boolean: return 1U;
196  default: return 0U;
197  }
198 }

References BFloat16, Boolean, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and Signed64.

Referenced by TosaRefTensorHandle::CanBeImported(), RefTensorHandle::CanBeImported(), TensorInfo::GetNumBytes(), GetUnpaddedTensorStrides(), PermuteTensor(), and armnn_driver::SwizzleAndroidNn4dTensorToArmNn().

◆ GetEventPtr() [1/2]

const Event* armnn::GetEventPtr ( const Event ptr)

Definition at line 109 of file Profiling.cpp.

109 { return ptr;}

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults().

◆ GetEventPtr() [2/2]

const Event* armnn::GetEventPtr ( const std::unique_ptr< Event > &  ptr)

Definition at line 110 of file Profiling.cpp.

110 {return ptr.get(); }

◆ GetFusedName()

void armnn::GetFusedName ( Layer layerList[4],
std::string &  fusedName 
)
inline

Definition at line 71 of file NeonBackendOptimizationUtils.hpp.

72 {
73  // Build the fused name string
74  fusedName = "fused";
75  for (unsigned int layerIdx = 0; layerIdx< 4; ++layerIdx)
76  {
77  if (! layerList[layerIdx])
78  {
79  break;
80  }
81  fusedName += "-";
82  fusedName += layerList[layerIdx]->GetNameStr();
83  }
84 }

References Layer::GetNameStr().

Referenced by NeonBackend::OptimizeSubgraphView().

◆ GetFusedTypeAsCString()

constexpr char const* armnn::GetFusedTypeAsCString ( FusedKernelType  type)
constexpr

Definition at line 119 of file TypesUtils.hpp.

120 {
121  switch (type)
122  {
123  case FusedKernelType::AddMulAdd: return "AddMulAdd";
124  default: return "Unknown";
125  }
126 }

References AddMulAdd.

Referenced by StringifyLayerParameters< FusedDescriptor >::Serialize().

◆ GetGraphForTesting()

Graph & GetGraphForTesting ( IOptimizedNetwork optNet)

Definition at line 49 of file TestUtils.cpp.

50 {
51  return optNet->pOptimizedNetworkImpl->GetGraph();
52 }

References IOptimizedNetwork::pOptimizedNetworkImpl.

◆ GetILayerSupportByBackendId()

LayerSupportHandle GetILayerSupportByBackendId ( const armnn::BackendId backend)

Convenience function to retrieve the ILayerSupportHandle for a backend.

Definition at line 23 of file BackendHelper.cpp.

24 {
25  BackendRegistry& backendRegistry = armnn::BackendRegistryInstance();
26 
27  if (!backendRegistry.IsBackendRegistered(backend))
28  {
29  return LayerSupportHandle(nullptr);
30  }
31 
32  auto factoryFunc = backendRegistry.GetFactory(backend);
33  auto backendObject = factoryFunc();
34  return LayerSupportHandle(backendObject->GetLayerSupport(), backend);
35 }

References BackendRegistryInstance(), BackendRegistry::GetFactory(), and BackendRegistry::IsBackendRegistered().

◆ GetInputTensor()

const armnn::ConstTensor armnn::GetInputTensor ( const LayerBindingId  layerId,
const InputTensors inputTensors 
)

Definition at line 1404 of file LoadedNetwork.cpp.

1405 {
1406  for (auto inputTensorPair : inputTensors)
1407  {
1408  LayerBindingId id = inputTensorPair.first;
1409  if (id == layerId)
1410  {
1411  return inputTensorPair.second;
1412  }
1413  }
1414  throw InvalidArgumentException("Input does not exist.");
1415 }

◆ GetInputTensorData()

const DataType* armnn::GetInputTensorData ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 42 of file RefWorkloadUtils.hpp.

43 {
44  const ITensorHandle* tensorHandle = data.m_Inputs[idx];
45  return reinterpret_cast<const DataType*>(tensorHandle->Map());
46 }

References ITensorHandle::Map().

◆ GetInputTensorDataBFloat16()

const BFloat16* armnn::GetInputTensorDataBFloat16 ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 86 of file RefWorkloadUtils.hpp.

87 {
88  return GetInputTensorData<BFloat16>(idx, data);
89 }

◆ GetInputTensorDataFloat()

const float* armnn::GetInputTensorDataFloat ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 62 of file RefWorkloadUtils.hpp.

63 {
64  return GetInputTensorData<float>(idx, data);
65 }

◆ GetInputTensorDataHalf()

const Half* armnn::GetInputTensorDataHalf ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 74 of file RefWorkloadUtils.hpp.

75 {
76  return GetInputTensorData<Half>(idx, data);
77 }

◆ GetLayerInOutDatatype()

std::vector<DataType> armnn::GetLayerInOutDatatype ( const Layer layer)
inline

Definition at line 1020 of file Network.cpp.

1021 {
1022  DataType dataTypeIn = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
1023  layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
1024  DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
1025  layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
1026  return {dataTypeIn, dataTypeOut};
1027 }

References Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetNumInputSlots(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), and OutputSlot::GetTensorInfo().

Referenced by AssignBackends(), and AssignBackendsIConnectable().

◆ GetLayerTypeAsCString()

◆ GetLogicalBinaryOperationAsCString()

constexpr char const* armnn::GetLogicalBinaryOperationAsCString ( LogicalBinaryOperation  operation)
constexpr

Definition at line 109 of file TypesUtils.hpp.

110 {
111  switch (operation)
112  {
113  case LogicalBinaryOperation::LogicalAnd: return "LogicalAnd";
114  case LogicalBinaryOperation::LogicalOr: return "LogicalOr";
115  default: return "Unknown";
116  }
117 }

References LogicalAnd, and LogicalOr.

◆ GetMemBlockStrategyTypeName()

constexpr const char* armnn::GetMemBlockStrategyTypeName ( MemBlockStrategyType  memBlockStrategyType)
constexpr

Definition at line 295 of file TypesUtils.hpp.

296 {
297  switch (memBlockStrategyType)
298  {
299  case MemBlockStrategyType::SingleAxisPacking: return "SingleAxisPacking";
300  case MemBlockStrategyType::MultiAxisPacking: return "MultiAxisPacking";
301  default: return "Unknown";
302  }
303 }

References MultiAxisPacking, and SingleAxisPacking.

Referenced by RuntimeImpl::RuntimeImpl().

◆ GetMemoryOptimizerStrategy()

std::unique_ptr<IMemoryOptimizerStrategy> armnn::GetMemoryOptimizerStrategy ( const std::string &  strategyName)
inline

Definition at line 36 of file MemoryOptimizerStrategyLibrary.hpp.

37 {
38  const auto& strategyFactoryMap = GetStrategyFactories();
39  auto strategyFactory = strategyFactoryMap.find(strategyName);
40  if (strategyFactory != GetStrategyFactories().end())
41  {
42  return strategyFactory->second->CreateMemoryOptimizerStrategy();
43  }
44  return nullptr;
45 }

Referenced by RuntimeImpl::RuntimeImpl().

◆ GetMemoryOptimizerStrategyNames()

const std::vector<std::string> armnn::GetMemoryOptimizerStrategyNames ( )
inline

Definition at line 47 of file MemoryOptimizerStrategyLibrary.hpp.

48 {
49  const auto& strategyFactoryMap = GetStrategyFactories();
50  std::vector<std::string> strategyNames;
51  for (const auto& strategyFactory : strategyFactoryMap)
52  {
53  strategyNames.emplace_back(strategyFactory.first);
54  }
55  return strategyNames;
56 }

◆ GetModelOptionsForTesting()

ModelOptions & GetModelOptionsForTesting ( IOptimizedNetwork optNet)

Definition at line 54 of file TestUtils.cpp.

55 {
56  return optNet->pOptimizedNetworkImpl->GetModelOptions();
57 }

References IOptimizedNetwork::pOptimizedNetworkImpl.

◆ GetNormalizationAlgorithmChannelAsCString()

constexpr const char* armnn::GetNormalizationAlgorithmChannelAsCString ( NormalizationAlgorithmChannel  channel)
constexpr

Definition at line 265 of file TypesUtils.hpp.

266 {
267  switch (channel)
268  {
269  case NormalizationAlgorithmChannel::Across: return "Across";
270  case NormalizationAlgorithmChannel::Within: return "Within";
271  default: return "Unknown";
272  }
273 }

References Across, and Within.

Referenced by StringifyLayerParameters< NormalizationDescriptor >::Serialize().

◆ GetNormalizationAlgorithmMethodAsCString()

constexpr const char* armnn::GetNormalizationAlgorithmMethodAsCString ( NormalizationAlgorithmMethod  method)
constexpr

Definition at line 275 of file TypesUtils.hpp.

276 {
277  switch (method)
278  {
279  case NormalizationAlgorithmMethod::LocalBrightness: return "LocalBrightness";
280  case NormalizationAlgorithmMethod::LocalContrast: return "LocalContrast";
281  default: return "Unknown";
282  }
283 }

References LocalBrightness, and LocalContrast.

Referenced by StringifyLayerParameters< NormalizationDescriptor >::Serialize().

◆ GetNumActivations()

unsigned int armnn::GetNumActivations ( const TensorInfo inputInfo)

Definition at line 16 of file RefFullyConnectedWorkload.cpp.

17 {
18  unsigned int numActivations = 1; // Total number of activations in the input.
19  for (unsigned int i = 1; i < inputInfo.GetNumDimensions(); i++)
20  {
21  numActivations *= inputInfo.GetShape()[i];
22  }
23  return numActivations;
24 }

References TensorInfo::GetNumDimensions(), and TensorInfo::GetShape().

◆ GetNumberOfCacheFiles()

unsigned int GetNumberOfCacheFiles ( const armnn::BackendId backend)

Returns the number of cached files if backend supports caching.

Definition at line 130 of file BackendHelper.cpp.

131 {
132  auto const& backendRegistry = armnn::BackendRegistryInstance();
133  if (backendRegistry.IsBackendRegistered(backend))
134  {
135  auto factoryFunc = backendRegistry.GetFactory(backend);
136  auto backendObject = factoryFunc();
137  return backendObject->GetNumberOfCacheFiles();
138  }
139  return 0;
140 }

References BackendRegistryInstance().

Referenced by ArmnnDriver::getNumberOfCacheFilesNeeded(), ArmnnDriverImpl::PrepareArmnnModel(), and ArmnnDriverImpl::PrepareArmnnModelFromCache().

◆ GetNumInputs()

uint32_t armnn::GetNumInputs ( bool  biasEnabled)

Definition at line 454 of file Descriptors.cpp.

455 {
456  unsigned int numInputs = 2;
457  if (biasEnabled)
458  {
459  numInputs = 3;
460  }
461  return numInputs;
462 }

Referenced by FullyConnectedDescriptor::GetNumInputs(), Convolution2dDescriptor::GetNumInputs(), Convolution3dDescriptor::GetNumInputs(), and DepthwiseConvolution2dDescriptor::GetNumInputs().

◆ GetOffset()

unsigned int armnn::GetOffset ( const TensorShape shape,
unsigned int  b,
unsigned int  h,
unsigned int  w,
unsigned int  c,
const DataLayoutIndexed dataLayout 
)

Definition at line 15 of file SpaceToBatchNd.cpp.

21 {
22  // 3D Tensors
23  unsigned int channelDimension3D = dataLayout.GetDataLayout() == DataLayout::NCHW ? 1 : 2;
24  if (shape.GetNumDimensions() == 3)
25  {
26  return (b * shape[dataLayout.GetHeightIndex()] + h) * shape[channelDimension3D] + c;
27  }
28  // 4D Tensors
29  else if (shape.GetNumDimensions() == 4)
30  {
31  if (dataLayout.GetDataLayout() == DataLayout::NHWC)
32  {
33  return ((b * shape[dataLayout.GetHeightIndex()] + h) * shape[dataLayout.GetWidthIndex()] + w) *
34  shape[dataLayout.GetChannelsIndex()] + c;
35  }
36  else
37  {
38  return ((b * shape[dataLayout.GetChannelsIndex()] + c) * shape[dataLayout.GetHeightIndex()] + h) *
39  shape[dataLayout.GetWidthIndex()] + w;
40  }
41  }
42  else
43  {
44  throw InvalidArgumentException("Tensor rank must be either 3 or 4", CHECK_LOCATION());
45  }
46 }

References CHECK_LOCATION, DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumDimensions(), and DataLayoutIndexed::GetWidthIndex().

Referenced by SpaceToBatchNd(), and SpaceToDepth().

◆ GetOutputShapeRoundingAsCString()

constexpr char const* armnn::GetOutputShapeRoundingAsCString ( OutputShapeRounding  rounding)
constexpr

Definition at line 139 of file TypesUtils.hpp.

140 {
141  switch (rounding)
142  {
143  case OutputShapeRounding::Ceiling: return "Ceiling";
144  case OutputShapeRounding::Floor: return "Floor";
145  default: return "Unknown";
146  }
147 }

References Ceiling, and Floor.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

◆ GetOutputTensor()

const armnn::Tensor armnn::GetOutputTensor ( const LayerBindingId  layerId,
const OutputTensors outputTensors 
)

Definition at line 1417 of file LoadedNetwork.cpp.

1418 {
1419  for (auto outputTensorPair : outputTensors)
1420  {
1421  LayerBindingId id = outputTensorPair.first;
1422  if (id == layerId)
1423  {
1424  return outputTensorPair.second;
1425  }
1426  }
1427  throw InvalidArgumentException("Output does not exist.");
1428 }

◆ GetOutputTensorData() [1/2]

DataType* armnn::GetOutputTensorData ( ITensorHandle tensorHandle)

Definition at line 56 of file RefWorkloadUtils.hpp.

57 {
58  return reinterpret_cast<DataType*>(tensorHandle->Map());
59 }

References ITensorHandle::Map().

◆ GetOutputTensorData() [2/2]

DataType * GetOutputTensorData ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 181 of file ClWorkloadUtils.hpp.

182 {
183  ITensorHandle* tensorHandle = data.m_Outputs[idx];
184  return reinterpret_cast<DataType*>(tensorHandle->Map());
185 }

References ITensorHandle::Map().

◆ GetOutputTensorDataBFloat16()

BFloat16* armnn::GetOutputTensorDataBFloat16 ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 92 of file RefWorkloadUtils.hpp.

93 {
94  return GetOutputTensorData<BFloat16>(idx, data);
95 }

◆ GetOutputTensorDataFloat()

float* armnn::GetOutputTensorDataFloat ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 68 of file RefWorkloadUtils.hpp.

69 {
70  return GetOutputTensorData<float>(idx, data);
71 }

◆ GetOutputTensorDataHalf()

Half* armnn::GetOutputTensorDataHalf ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 80 of file RefWorkloadUtils.hpp.

81 {
82  return GetOutputTensorData<Half>(idx, data);
83 }

◆ GetPaddingMethodAsCString()

constexpr char const* armnn::GetPaddingMethodAsCString ( PaddingMethod  method)
constexpr

Definition at line 149 of file TypesUtils.hpp.

150 {
151  switch (method)
152  {
153  case PaddingMethod::Exclude: return "Exclude";
154  case PaddingMethod::IgnoreValue: return "IgnoreValue";
155  default: return "Unknown";
156  }
157 }

References Exclude, and IgnoreValue.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

◆ GetPaddingModeAsCString()

constexpr char const* armnn::GetPaddingModeAsCString ( PaddingMode  mode)
constexpr

Definition at line 159 of file TypesUtils.hpp.

160 {
161  switch (mode)
162  {
163  case PaddingMode::Constant: return "Exclude";
164  case PaddingMode::Symmetric: return "Symmetric";
165  case PaddingMode::Reflect: return "Reflect";
166  default: return "Unknown";
167  }
168 }

References Constant, Reflect, and Symmetric.

Referenced by StringifyLayerParameters< PadDescriptor >::Serialize().

◆ GetPoolingAlgorithmAsCString()

constexpr char const* armnn::GetPoolingAlgorithmAsCString ( PoolingAlgorithm  pooling)
constexpr

Definition at line 128 of file TypesUtils.hpp.

129 {
130  switch (pooling)
131  {
132  case PoolingAlgorithm::Average: return "Average";
133  case PoolingAlgorithm::Max: return "Max";
134  case PoolingAlgorithm::L2: return "L2";
135  default: return "Unknown";
136  }
137 }

References Average, L2, and Max.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

◆ GetProfilingService()

arm::pipe::IProfilingService & GetProfilingService ( armnn::RuntimeImpl runtime)

Definition at line 59 of file TestUtils.cpp.

60 {
61  return *(runtime->m_ProfilingService.get());
62 }

◆ GetReduceOperationAsCString()

constexpr char const* armnn::GetReduceOperationAsCString ( ReduceOperation  reduce_operation)
constexpr

Definition at line 170 of file TypesUtils.hpp.

171 {
172  switch (reduce_operation)
173  {
174  case ReduceOperation::Sum: return "Sum";
175  case ReduceOperation::Max: return "Max";
176  case ReduceOperation::Mean: return "Mean";
177  case ReduceOperation::Min: return "Min";
178  case ReduceOperation::Prod: return "Prod";
179  default: return "Unknown";
180  }
181 }

References Max, Mean, Min, Prod, and Sum.

Referenced by StringifyLayerParameters< ReduceDescriptor >::Serialize().

◆ GetResizeMethodAsCString()

constexpr const char* armnn::GetResizeMethodAsCString ( ResizeMethod  method)
constexpr

Definition at line 285 of file TypesUtils.hpp.

286 {
287  switch (method)
288  {
289  case ResizeMethod::Bilinear: return "Bilinear";
290  case ResizeMethod::NearestNeighbor: return "NearestNeighbour";
291  default: return "Unknown";
292  }
293 }

References Bilinear, and NearestNeighbor.

Referenced by StringifyLayerParameters< ResizeDescriptor >::Serialize().

◆ GetSizePrefixedClContext()

const armnn::ClContext* armnn::GetSizePrefixedClContext ( const void *  buf)
inline

Definition at line 144 of file ClContextSchema_generated.h.

144  {
145  return flatbuffers::GetSizePrefixedRoot<armnn::ClContext>(buf);
146 }

◆ GetStatusAsCString()

constexpr char const* armnn::GetStatusAsCString ( Status  status)
constexpr

Definition at line 21 of file TypesUtils.hpp.

22 {
23  switch (status)
24  {
25  case armnn::Status::Success: return "Status::Success";
26  case armnn::Status::Failure: return "Status::Failure";
27  default: return "Unknown";
28  }
29 }

References Failure, and Success.

Referenced by operator<<().

◆ GetTensorInfo()

const TensorInfo& armnn::GetTensorInfo ( const ITensorHandle tensorHandle)
inline

float32 helpers

Definition at line 33 of file RefWorkloadUtils.hpp.

34 {
35  // We know that reference workloads use RefTensorHandles for inputs and outputs
36  const TensorHandleType* refTensorHandle =
37  PolymorphicDowncast<const TensorHandleType*>(tensorHandle);
38  return refTensorHandle->GetTensorInfo();
39 }

Referenced by BatchNormImpl(), Concatenate(), ExecuteFunction(), Split(), Splitter(), FillLayer::ValidateTensorShapesFromInputs(), SwitchLayer::ValidateTensorShapesFromInputs(), ConstantLayer::ValidateTensorShapesFromInputs(), DetectionPostProcessLayer::ValidateTensorShapesFromInputs(), SplitterLayer::ValidateTensorShapesFromInputs(), LstmLayer::ValidateTensorShapesFromInputs(), QuantizedLstmLayer::ValidateTensorShapesFromInputs(), and QLstmLayer::ValidateTensorShapesFromInputs().

◆ GetTimeDuration()

std::chrono::duration<double, std::milli> armnn::GetTimeDuration ( std::chrono::high_resolution_clock::time_point  start_time)
inline

Definition at line 19 of file Timer.hpp.

21 {
22  return std::chrono::duration<double, std::milli>(GetTimeNow() - start_time);
23 }

References GetTimeNow().

Referenced by RuntimeImpl::EnqueueWorkload(), RuntimeImpl::Execute(), and RuntimeImpl::~RuntimeImpl().

◆ GetTimeNow()

std::chrono::high_resolution_clock::time_point armnn::GetTimeNow ( )
inline

Definition at line 14 of file Timer.hpp.

15 {
16  return std::chrono::high_resolution_clock::now();
17 }

Referenced by RuntimeImpl::EnqueueWorkload(), RuntimeImpl::Execute(), GetTimeDuration(), RuntimeImpl::RuntimeImpl(), and RuntimeImpl::~RuntimeImpl().

◆ GetUnaryOperationAsCString()

constexpr char const* armnn::GetUnaryOperationAsCString ( UnaryOperation  operation)
constexpr

Definition at line 92 of file TypesUtils.hpp.

93 {
94  switch (operation)
95  {
96  case UnaryOperation::Abs: return "Abs";
97  case UnaryOperation::Ceil: return "Ceil";
98  case UnaryOperation::Exp: return "Exp";
99  case UnaryOperation::Sqrt: return "Sqrt";
100  case UnaryOperation::Rsqrt: return "Rsqrt";
101  case UnaryOperation::Neg: return "Neg";
102  case UnaryOperation::Log: return "Log";
103  case UnaryOperation::LogicalNot: return "LogicalNot";
104  case UnaryOperation::Sin: return "Sin";
105  default: return "Unknown";
106  }
107 }

References Abs, Ceil, Exp, Log, LogicalNot, Neg, Rsqrt, Sin, and Sqrt.

Referenced by StringifyLayerParameters< ElementwiseUnaryDescriptor >::Serialize().

◆ GetUnpaddedTensorStrides()

TensorShape GetUnpaddedTensorStrides ( const TensorInfo tensorInfo)

Definition at line 15 of file TensorHandle.cpp.

16 {
17  TensorShape shape(tensorInfo.GetShape());
18  auto size = GetDataTypeSize(tensorInfo.GetDataType());
19  auto runningSize = size;
20  std::vector<unsigned int> strides(shape.GetNumDimensions());
21  auto lastIdx = shape.GetNumDimensions()-1;
22  for (unsigned int i=0; i < lastIdx ; i++)
23  {
24  strides[lastIdx-i] = runningSize;
25  runningSize *= shape[lastIdx-i];
26  }
27  strides[0] = runningSize;
28  return TensorShape(shape.GetNumDimensions(), strides.data());
29 }

References TensorInfo::GetDataType(), GetDataTypeSize(), TensorShape::GetNumDimensions(), and TensorInfo::GetShape().

Referenced by TosaRefTensorHandle::GetStrides(), SampleTensorHandle::GetStrides(), RefTensorHandle::GetStrides(), ConstTensorHandle::GetStrides(), and RefTensorHandleDecorator::GetStrides().

◆ GetVersion()

const std::string GetVersion ( )

Definition at line 77 of file Utils.cpp.

78 {
79  return ARMNN_VERSION;
80 }

References ARMNN_VERSION.

◆ GpuFsaActivationCreateOp()

void GpuFsaActivationCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const ActivationDescriptor descriptor 
)

Definition at line 58 of file GpuFsaActivation.cpp.

61 {
62  GpuWorkloadSketch* sketch = blob->sketch.get();
63  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
64  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
65  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
66 
67  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
68 
69  aclInput0Info.set_are_values_constant(input.IsConstant());
70 
71  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info));
72 
73  // Validate operator, check status and update reasonIfUnsupported
74  arm_compute::Status aclStatus{};
75  switch (descriptor.m_Function)
76  {
77  case ActivationFunction::TanH:
78  {
79  aclStatus = GpuTanh::validate_op(*sketch, inputTensorInfos[0]);
80  break;
81  }
82  case ActivationFunction::Sigmoid:
83  {
84  aclStatus = GpuSigmoid::validate_op(*sketch, inputTensorInfos[0]);
85  break;
86  }
87  default:
88  throw InvalidArgumentException(std::string("Activation function currently not supported in GpuFsa: ")
89  + GetActivationFunctionAsCString(descriptor.m_Function));
90 
91  }
92  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
93  if (!supported)
94  {
95  throw BackendCapabilityException("\"GpuFsa\" backend failed during Activation layer validation");
96  }
97 
98  arm_compute::ITensorInfo* activationOutputInfo{};
99  switch (descriptor.m_Function)
100  {
101  case ActivationFunction::TanH:
102  {
103  activationOutputInfo = GpuTanh::create_op(*sketch, inputTensorInfos[0]);
104  break;
105  }
106  case ActivationFunction::Sigmoid:
107  {
108  activationOutputInfo = GpuSigmoid::create_op(*sketch, inputTensorInfos[0]);
109  break;
110  }
111  default:
112  throw InvalidArgumentException(std::string("Activation function currently not supported in GpuFsa: ")
113  + GetActivationFunctionAsCString(descriptor.m_Function));
114 
115  }
116 
117  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
118  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
119  GpuOutput::create_op(*sketch, activationOutputInfo, outputTensorInfos[0]);
120 
121  // Store the TensorInfos within the blob as unique_ptrs to be used later
122  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
123  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
124 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaActivationValidate()

arm_compute::Status GpuFsaActivationValidate ( const TensorInfo input,
const ActivationDescriptor descriptor 
)

Definition at line 22 of file GpuFsaActivation.cpp.

24 {
25  // Create a new workload sketch, for validation purposes
26  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
27  auto workloadContext = GpuWorkloadContext(&compileCtx);
28  GpuWorkloadSketch sketch{ &workloadContext };
29 
30  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
31  aclInputInfo.set_are_values_constant(input.IsConstant());
32 
33  arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
34 
35  switch (descriptor.m_Function)
36  {
37  case ActivationFunction::TanH:
38  {
39  if ( descriptor.m_A != 1 || descriptor.m_B != 1)
40  {
41  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
42  "Activation function TanH only works with a=1 and b=1");
43  }
44  return GpuTanh::validate_op(sketch, inputInfo);
45  }
46  case ActivationFunction::Sigmoid:
47  {
48  return GpuSigmoid::validate_op(sketch, inputInfo);
49  }
50  default:
51  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
52  std::string("Activation function currently not supported in GpuFsa: ")
53  + GetActivationFunctionAsCString(descriptor.m_Function));
54  }
55 
56 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaBackendId()

constexpr const char* armnn::GpuFsaBackendId ( )
constexpr

Definition at line 10 of file GpuFsaBackendId.hpp.

10 { return "GpuFsa"; }

Referenced by GpuFsaBackend::GetIdStatic().

◆ GpuFsaBatchMatMulCreateOp()

void GpuFsaBatchMatMulCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input0,
const TensorInfo input1,
const BatchMatMulDescriptor descriptor 
)

Definition at line 51 of file GpuFsaBatchMatMul.cpp.

55 {
56  GpuWorkloadSketch* sketch = blob->sketch.get();
57  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
58  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
59  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
60 
61  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
62  arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
63 
64  aclInput0Info.set_are_values_constant(input0.IsConstant());
65  aclInput1Info.set_are_values_constant(input1.IsConstant());
66 
67  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info));
68  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput1Info));
69 
70  MatMulAttributes matMulAttributes{};
71  matMulAttributes.adj_lhs(descriptor.m_TransposeX);
72  matMulAttributes.adj_rhs(descriptor.m_TransposeY);
73  GpuMatMulSettings matmulSettings{};
74  matmulSettings.m0(1);
75  matmulSettings.n0(1);
76  matmulSettings.k0(1);
77 
78  // Validate operator, check status and update reasonIfUnsupported
79  arm_compute::Status aclStatus = GpuMatMul::validate_op(*sketch,
80  inputTensorInfos[0],
81  inputTensorInfos[1],
82  matMulAttributes,
83  matmulSettings);
84 
85  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
86  if (!supported)
87  {
88  throw BackendCapabilityException("\"GpuFsa\" backend failed during elementwise binary add validation");
89  }
90 
91  arm_compute::ITensorInfo* addOutputInfo = GpuMatMul::create_op(*sketch,
92  inputTensorInfos[0],
93  inputTensorInfos[1],
94  matMulAttributes,
95  matmulSettings);
96 
97  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
98  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
99  GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
100 
101  // Store the TensorInfos within the blob as unique_ptrs to be used later
102  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
103  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
104 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaBatchMatMulValidate()

arm_compute::Status GpuFsaBatchMatMulValidate ( const TensorInfo input0,
const TensorInfo input1,
const BatchMatMulDescriptor descriptor 
)

Definition at line 22 of file GpuFsaBatchMatMul.cpp.

25 {
26  // Create a new workload sketch, for validation purposes
27  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
28  auto workloadContext = GpuWorkloadContext(&compileCtx);
29  GpuWorkloadSketch sketch{ &workloadContext };
30 
31  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
32  arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
33 
34  aclInput0Info.set_are_values_constant(input0.IsConstant());
35  aclInput1Info.set_are_values_constant(input1.IsConstant());
36 
37  arm_compute::ITensorInfo* inputInfo0 = workloadContext.create_tensor_info(aclInput0Info);
38  arm_compute::ITensorInfo* inputInfo1 = workloadContext.create_tensor_info(aclInput1Info);
39 
40  MatMulAttributes matMulAttributes{};
41  matMulAttributes.adj_lhs(descriptor.m_TransposeX);
42  matMulAttributes.adj_rhs(descriptor.m_TransposeY);
43  GpuMatMulSettings matmulSettings{};
44  matmulSettings.m0(1);
45  matmulSettings.n0(1);
46  matmulSettings.k0(1);
47 
48  return GpuMatMul::validate_op(sketch, inputInfo0, inputInfo1, matMulAttributes, matmulSettings);
49 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaCastCreateOp()

void GpuFsaCastCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const TensorInfo output 
)

Definition at line 61 of file GpuFsaCast.cpp.

64 {
65  using namespace armcomputetensorutils;
66 
67  GpuWorkloadSketch* sketch = blob->sketch.get();
68  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
69  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
70  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
71 
72  arm_compute::TensorInfo aclinputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
73 
74  aclinputInfo.set_are_values_constant(input.IsConstant());
75 
76  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclinputInfo));
77 
78  CastAttributes cast_attr = CastAttributesFromTensorInfo(output);
79 
80  // Validate operator, check status and update reasonIfUnsupported
81  arm_compute::Status aclStatus = GpuCast::validate_op(*sketch, inputTensorInfos[0], cast_attr);
82  const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK;
83  if (!validated)
84  {
85  throw BackendCapabilityException("\"" + std::string(GpuFsaBackendId())
86  + "\" backend failed during cast operator validation");
87  }
88 
89  arm_compute::ITensorInfo* castOutputInfo =
90  GpuCast::create_op(*sketch, inputTensorInfos[0], cast_attr);
91 
92  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
93  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
94  GpuOutput::create_op(*sketch, castOutputInfo, outputTensorInfos[0]);
95 
96  // Store the TensorInfos within the blob as unique_ptrs to be used later
97  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
98  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
99 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaCastValidate()

arm_compute::Status GpuFsaCastValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 33 of file GpuFsaCast.cpp.

34 {
35  using namespace armcomputetensorutils;
36 
37  // Create a new workload sketch, for validation purposes
38  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
39  auto workloadContext = GpuWorkloadContext(&compileCtx);
40  GpuWorkloadSketch sketch{ &workloadContext };
41 
42  arm_compute::TensorInfo aclinputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
43 
44  aclinputInfo.set_are_values_constant(input.IsConstant());
45 
46  arm_compute::ITensorInfo* inputInfo0 = workloadContext.create_tensor_info(aclinputInfo);
47 
48  CastAttributes cast_attr = CastAttributesFromTensorInfo(output);
49 
50  arm_compute::Status aclStatus = GpuCast::validate_op(sketch, inputInfo0, cast_attr);
51 #ifndef NDEBUG
52  const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK;
53  if (!validated)
54  {
55  std::cout << "GpuFsaCastValidate failed: " << aclStatus.error_description() << std::endl;
56  }
57 #endif
58  return aclStatus;
59 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaConstantWorkloadValidate()

arm_compute::Status GpuFsaConstantWorkloadValidate ( const TensorInfo output)

Definition at line 17 of file GpuFsaConstantWorkload.cpp.

18 {
19  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
20 
21  std::array<arm_compute::DataType,8> supportedTypes = {
22  arm_compute::DataType::F16,
23  arm_compute::DataType::F32,
24  arm_compute::DataType::QASYMM8,
25  arm_compute::DataType::QASYMM8_SIGNED,
26  arm_compute::DataType::QSYMM16,
27  arm_compute::DataType::QSYMM8,
28  arm_compute::DataType::QSYMM8_PER_CHANNEL,
29  arm_compute::DataType::S32
30  };
31  auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
32 
33  if (it != end(supportedTypes))
34  {
35  return arm_compute::Status{};
36  }
37  else
38  {
39  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
40  }
41 }

◆ GpuFsaConvolution2dCreateOp()

void GpuFsaConvolution2dCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 70 of file GpuFsaConvolution2d.cpp.

75 {
76 /*
77  * Creating an Op for the GpuFsa backend requires us to create and maintain quite a bit of data, which is then stored
78  * in a GpuFsaPreCompiledBlob for execution later. Specifically we need:
79  * GpuWorkloadContext, this contains the TensorInfos and is unique to the Graph being executed
80  * Sketch, this is similar to a subgraph and can contain one or more operations. Multiple ops can be "fused" together
81  * using a single sketch.
82  * The inputTensorinfos / outputTensorInfos, these are pointers to the TensorInfos used when creating the sketch.
83  * They refer to the TensorInfos stored within the GpuWorkloadContext and are needed when executing the sketch
84  * as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime
85  * doesn't know which Tensors to use.
86  */
87  GpuWorkloadSketch* sketch = blob->sketch.get();
88  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
89  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
90  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
91 
92  // Build and create tensor infos using the sketch
93  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
94  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
95  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
96 
97  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
98  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclWeightsInfo));
99 
100  // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op / create_op
101  arm_compute::TensorInfo aclBiasInfo;
102  arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
103 
104  if (descriptor.m_BiasEnabled)
105  {
106  if(!biases.has_value())
107  {
108  throw InvalidArgumentException("GpuFsaConvolution2d::CreateOp: No biases set when biases are enabled");
109  }
110  aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
111  aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
112 
113  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclBiasInfo));
114  biasSketchInfoPtr = inputTensorInfos[2];
115  }
116 
117  Conv2dAttributes conv2dAttributes = CreateConv2dAttributes(descriptor);
118 
119  // Validate operator, check status and update reasonIfUnsupported
120  arm_compute::Status aclStatus = GpuConv2d::validate_op(*sketch,
121  inputTensorInfos[0],
122  inputTensorInfos[1],
123  biasSketchInfoPtr,
124  conv2dAttributes);
125 
126  const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
127  if (!supported)
128  {
129  throw BackendCapabilityException("\"GpuFsa\" backend failed during Convolution2D operation validation");
130  }
131 
132  // Create the Op within the Sketch using the TensorInfos we have stored
133  arm_compute::ITensorInfo* convOutInfo = GpuConv2d::create_op(*sketch,
134  inputTensorInfos[0],
135  inputTensorInfos[1],
136  biasSketchInfoPtr,
137  conv2dAttributes);
138 
139  // Create the Output
140  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
141  GpuOutput::create_op(*sketch, convOutInfo, outputTensorInfos[0]);
142 
143  // Store the TensorInfos within the blob as unique_ptrs to be used later
144  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
145  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
146 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaConvolution2dValidate()

arm_compute::Status GpuFsaConvolution2dValidate ( const TensorInfo input,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 24 of file GpuFsaConvolution2d.cpp.

28 {
29  // Create a new workload sketch, for validation purposes
30  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
31  auto workloadContext = GpuWorkloadContext(&compileCtx);
32  GpuWorkloadSketch sketch{ &workloadContext };
33 
34  // Build and create tensor infos using the sketch
35  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
36  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
37  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
38 
39  auto inputInfo = workloadContext.create_tensor_info(aclInputInfo);
40  auto weightInfo = workloadContext.create_tensor_info(aclWeightsInfo);
41 
42  // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
43  arm_compute::TensorInfo aclBiasInfo;
44  arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
45 
46  if (descriptor.m_BiasEnabled)
47  {
48  if(!biases.has_value())
49  {
50  throw InvalidArgumentException("GpuFsaConvolution2d::ValidateOp: No biases set when biases are enabled");
51  }
52  aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
53  aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
54 
55  biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo);
56  }
57 
58  Conv2dAttributes conv2dAttributes = CreateConv2dAttributes(descriptor);
59 
60  // Validate operator, check status and update reasonIfUnsupported
61  arm_compute::Status aclStatus = GpuConv2d::validate_op(sketch,
62  inputInfo,
63  weightInfo,
64  biasSketchInfoPtr,
65  conv2dAttributes);
66 
67  return aclStatus;
68 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaDepthwiseConvolution2dCreateOp()

void GpuFsaDepthwiseConvolution2dCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 89 of file GpuFsaDepthwiseConvolution2d.cpp.

94 {
95 /*
96 * Creating an Op for the GpuFsa backend requires us to create and maintain quite a bit of data, which is then stored
97 * in a GpuFsaPreCompiledBlob for execution later. Specifically we need:
98 * GpuWorkloadContext, this contains the TensorInfos and is unique to the Graph being executed
99 * Sketch, this is similar to a subgraph and can contain one or more operations. Multiple ops can be "fused" together
100 * using a single sketch.
101 * The inputTensorinfos / outputTensorInfos, these are pointers to the TensorInfos used when creating the sketch.
102 * They refer to the TensorInfos stored within the GpuWorkloadContext and are needed when executing the sketch
103 * as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime
104 * doesn't know which Tensors to use.
105 */
106  GpuWorkloadSketch* sketch = blob->sketch.get();
107  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
108  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
109  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
110 
111  // Build and create tensor infos using the sketch
112  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
113 
114  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
115  //
116  // ACL format for weights for depthwise is:
117  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
118  // - [1, C, H, W] for [N, C, H, W] input/output layout
119  //
120  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
121  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
122  // so we do the permute here for the TensorInfo weights.
123  unsigned int aclDepthMultiplier;
124  TensorInfo weightsPermuted;
125  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
126  auto weightsShape = weightsPermuted.GetShape();
127  weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
128 
129  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
130  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
131 
132  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
133  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclWeightsInfo));
134 
135  // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
136  arm_compute::TensorInfo aclBiasInfo;
137  arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
138 
139  if (descriptor.m_BiasEnabled)
140  {
141  if(!biases.has_value())
142  {
143  throw InvalidArgumentException("GpuFsaConvolution2dValidate: No biases set when biases are enabled");
144  }
145  aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
146  aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
147 
148  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclBiasInfo));
149  biasSketchInfoPtr = inputTensorInfos[2];
150  }
151 
152  DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier);
153 
154  // Validate operator, check status and update reasonIfUnsupported
155  arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(*sketch,
156  inputTensorInfos[0],
157  inputTensorInfos[1],
158  biasSketchInfoPtr,
159  depthwiseConv2dAttributes);
160 
161  const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
162  if (!supported)
163  {
164  throw BackendCapabilityException(
165  "\"GpuFsa\" backend failed during DepthwiseConvolution2D operation validation");
166  }
167 
168  // Create the Op within the Sketch using the TensorInfos we have stored
169  arm_compute::ITensorInfo* convOutInfo = GpuDepthwiseConv2d::create_op(*sketch,
170  inputTensorInfos[0],
171  inputTensorInfos[1],
172  biasSketchInfoPtr,
173  depthwiseConv2dAttributes);
174 
175  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
176  GpuOutput::create_op(*sketch, convOutInfo, outputTensorInfos[0]);
177 
178  // Store the TensorInfos within the blob as unique_ptrs to be used later
179  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
180  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
181 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaDepthwiseConvolution2dValidate()

arm_compute::Status GpuFsaDepthwiseConvolution2dValidate ( const TensorInfo input,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 26 of file GpuFsaDepthwiseConvolution2d.cpp.

30 {
31  // Create a new workload sketch, for validation purposes
32  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
33  auto workloadContext = GpuWorkloadContext(&compileCtx);
34  GpuWorkloadSketch sketch{ &workloadContext };
35 
36  // Build and create tensor infos using the sketch
37  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
38 
39  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
40  //
41  // ACL format for weights for depthwise is:
42  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
43  // - [1, C, H, W] for [N, C, H, W] input/output layout
44  //
45  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
46  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
47  // so we do the permute here for the TensorInfo weights.
48  unsigned int aclDepthMultiplier;
49  TensorInfo weightsPermuted;
50  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
51  auto weightsShape = weightsPermuted.GetShape();
52  weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
53 
54  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
55  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
56 
57  auto inputInfo = workloadContext.create_tensor_info(aclInputInfo);
58  auto weightInfo = workloadContext.create_tensor_info(aclWeightsInfo);
59 
60  // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
61  arm_compute::TensorInfo aclBiasInfo;
62  arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
63 
64  if (descriptor.m_BiasEnabled)
65  {
66  if(!biases.has_value())
67  {
68  throw InvalidArgumentException(
69  "GpuFsaDepthwiseConvolution2dValidate: No biases set when biases are enabled");
70  }
71  aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
72  aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
73 
74  biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo);
75  }
76 
77  DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier);
78 
79  // Validate operator, check status and update reasonIfUnsupported
80  arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(sketch,
81  inputInfo,
82  weightInfo,
83  biasSketchInfoPtr,
84  depthwiseConv2dAttributes);
85 
86  return aclStatus;
87 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaElementwiseBinaryCreateOp()

void GpuFsaElementwiseBinaryCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input0,
const TensorInfo input1,
const ElementwiseBinaryDescriptor descriptor 
)

Definition at line 63 of file GpuFsaElementwiseBinary.cpp.

67 {
68  GpuWorkloadSketch* sketch = blob->sketch.get();
69  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
70  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
71  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
72 
73  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
74  arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
75 
76  aclInput0Info.set_are_values_constant(input0.IsConstant());
77  aclInput1Info.set_are_values_constant(input1.IsConstant());
78 
79  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info));
80  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput1Info));
81 
82  // Validate operator, check status and update reasonIfUnsupported
83  // Validate operator, check status and update reasonIfUnsupported
84  arm_compute::Status aclStatus{};
85  switch (descriptor.m_Operation)
86  {
87  case BinaryOperation::Add:
88  {
89  aclStatus = GpuAdd::validate_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
90  break;
91  }
92  case BinaryOperation::Mul:
93  {
94  aclStatus = GpuMul::validate_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
95  break;
96  }
97  case BinaryOperation::Sub:
98  {
99  aclStatus = GpuSub::validate_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
100  break;
101  }
102  default:
103  throw InvalidArgumentException(std::string("Elementwise Binary operation not supported in GpuFsa: ")
104  + GetBinaryOperationAsCString(descriptor.m_Operation));
105  }
106 
107  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
108  if (!supported)
109  {
110  throw BackendCapabilityException("\"GpuFsa\" backend failed during elementwise binary add validation");
111  }
112 
113  arm_compute::ITensorInfo* elementwiseBinaryOutputInfo{};
114  switch (descriptor.m_Operation)
115  {
116  case BinaryOperation::Add:
117  {
118  elementwiseBinaryOutputInfo = GpuAdd::create_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
119  break;
120  }
121  case BinaryOperation::Mul:
122  {
123  elementwiseBinaryOutputInfo = GpuMul::create_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
124  break;
125  }
126  case BinaryOperation::Sub:
127  {
128  elementwiseBinaryOutputInfo = GpuSub::create_op(*sketch, inputTensorInfos[0], inputTensorInfos[1]);
129  break;
130  }
131  default:
132  throw InvalidArgumentException(std::string("Elementwise Binary operation not supported in GpuFsa: ")
133  + GetBinaryOperationAsCString(descriptor.m_Operation));
134  }
135 
136  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
137  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
138  GpuOutput::create_op(*sketch, elementwiseBinaryOutputInfo, outputTensorInfos[0]);
139 
140  // Store the TensorInfos within the blob as unique_ptrs to be used later
141  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
142  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
143 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaElementwiseBinaryValidate()

arm_compute::Status GpuFsaElementwiseBinaryValidate ( const TensorInfo input0,
const TensorInfo input1,
const ElementwiseBinaryDescriptor descriptor 
)

Definition at line 24 of file GpuFsaElementwiseBinary.cpp.

27 {
28  // Create a new workload sketch, for validation purposes
29  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
30  auto workloadContext = GpuWorkloadContext(&compileCtx);
31  GpuWorkloadSketch sketch{ &workloadContext };
32 
33  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
34  arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
35 
36  aclInput0Info.set_are_values_constant(input0.IsConstant());
37  aclInput1Info.set_are_values_constant(input1.IsConstant());
38 
39  arm_compute::ITensorInfo* inputInfo0 = workloadContext.create_tensor_info(aclInput0Info);
40  arm_compute::ITensorInfo* inputInfo1 = workloadContext.create_tensor_info(aclInput1Info);
41 
42  switch (descriptor.m_Operation)
43  {
44  case BinaryOperation::Add:
45  {
46  return GpuAdd::validate_op(sketch, inputInfo0, inputInfo1);
47  }
48  case BinaryOperation::Mul:
49  {
50  return GpuMul::validate_op(sketch, inputInfo0, inputInfo1);
51  }
52  case BinaryOperation::Sub:
53  {
54  return GpuSub::validate_op(sketch, inputInfo0, inputInfo1);
55  }
56  default:
57  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
58  std::string("Elementwise Binary operation not supported in GpuFsa: ")
59  + GetBinaryOperationAsCString(descriptor.m_Operation));
60  }
61 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaPooling2dCreateOp()

void GpuFsaPooling2dCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const Pooling2dDescriptor descriptor 
)

Definition at line 40 of file GpuFsaPooling2d.cpp.

43 {
44  GpuWorkloadSketch* sketch = blob->sketch.get();
45  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
46  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
47  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
48 
49  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
50  aclInputInfo.set_are_values_constant(input.IsConstant());
51 
52  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
53 
54  Pool2dAttributes pool2dAttributes = CreatePool2dAttributes(descriptor);
55  GpuPool2dSettings pool2dSettings{};
56 
57  // Validate operator, check status and update reasonIfUnsupported
58  arm_compute::Status aclStatus = GpuPool2d::validate_op(*sketch,
59  inputTensorInfos[0],
60  pool2dAttributes,
61  pool2dSettings);
62 
63  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
64  if (!supported)
65  {
66  throw BackendCapabilityException("\"GpuFsa\" backend failed during pooling 2d validation");
67  }
68 
69  arm_compute::ITensorInfo* addOutputInfo = GpuPool2d::create_op(*sketch,
70  inputTensorInfos[0],
71  pool2dAttributes,
72  pool2dSettings);
73 
74  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
75  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
76  GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
77 
78  // Store the TensorInfos within the blob as unique_ptrs to be used later
79  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
80  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
81 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaPooling2dValidate()

arm_compute::Status GpuFsaPooling2dValidate ( const TensorInfo input,
const Pooling2dDescriptor descriptor 
)

Definition at line 22 of file GpuFsaPooling2d.cpp.

24 {
25  // Create a new workload sketch, for validation purposes
26  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
27  auto workloadContext = GpuWorkloadContext(&compileCtx);
28  GpuWorkloadSketch sketch{ &workloadContext };
29 
30  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
31  aclInputInfo.set_are_values_constant(input.IsConstant());
32  arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
33 
34  Pool2dAttributes pool2dAttributes = CreatePool2dAttributes(descriptor);
35  GpuPool2dSettings pool2dSettings{};
36 
37  return GpuPool2d::validate_op(sketch, inputInfo, pool2dAttributes, pool2dSettings);
38 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaPreCompiledWorkloadValidate()

bool armnn::GpuFsaPreCompiledWorkloadValidate ( std::string *  reasonIfUnsupported)

◆ GpuFsaReshapeCreateOp()

void GpuFsaReshapeCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const ReshapeDescriptor descriptor 
)

Definition at line 49 of file GpuFsaReshape.cpp.

50 {
51  GpuWorkloadSketch* sketch = blob->sketch.get();
52  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
53 
54  std::vector<arm_compute::ITensorInfo*> inputTensorInfos;
55  std::vector<arm_compute::ITensorInfo*> outputTensorInfos;
56 
57  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
58 
59  aclInputInfo.set_are_values_constant(input.IsConstant());
60 
61  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
62 
63  ReshapeAttributes attributes;
64  attributes.shape(BuildArmComputeTensorShape(descriptor.m_TargetShape));
65 
66  arm_compute::ITensorInfo* addOutputInfo = GpuReshape::create_op(*sketch, inputTensorInfos[0], attributes);
67 
68  // Temporary fix until fusing attempt is made for GpuFsa backend and outputLayer workoad is created
69  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
70  GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
71 
72  // Store the tensorInfos within the blob as std::unique_ptr<> so they can be used later
73  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
74  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
75 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaReshapeValidate()

arm_compute::Status GpuFsaReshapeValidate ( const TensorInfo input,
const ReshapeDescriptor descriptor 
)

Definition at line 22 of file GpuFsaReshape.cpp.

23 {
24  auto compileContext = arm_compute::CLKernelLibrary::get().get_compile_context();
25  auto workloadContext = GpuWorkloadContext(&compileContext);
26 
27  GpuWorkloadSketch sketch(&workloadContext);
28 
29  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
30  aclInputInfo.set_are_values_constant(input.IsConstant());
31 
32  arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
33 
34  ReshapeAttributes attributes;
35  attributes.shape(BuildArmComputeTensorShape(descriptor.m_TargetShape));
36 
37  arm_compute::Status aclStatus = GpuReshape::validate_op(sketch, inputInfo, attributes);
38 
39 #ifndef NDEBUG
40  if (aclStatus.error_code() != arm_compute::ErrorCode::OK)
41  {
42  std::cout << "GpuFsaReshapeValidate failed: " << aclStatus.error_description() << std::endl;
43  }
44 #endif
45 
46  return aclStatus;
47 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaResizeCreateOp()

void GpuFsaResizeCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const ResizeDescriptor descriptor 
)

Definition at line 39 of file GpuFsaResize.cpp.

42 {
43  GpuWorkloadSketch* sketch = blob->sketch.get();
44  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
45  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
46  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
47 
48  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
49  aclInputInfo.set_are_values_constant(input.IsConstant());
50 
51  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
52 
53  ResizeAttributes resizeAttributes = CreateResizeAttributes(descriptor);
54 
55  // Validate operator, check status and update reasonIfUnsupported
56  arm_compute::Status aclStatus = GpuResize::validate_op(*sketch,
57  inputTensorInfos[0],
58  resizeAttributes);
59 
60  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
61  if (!supported)
62  {
63  throw BackendCapabilityException("\"GpuFsa\" backend failed during resize validation");
64  }
65 
66  arm_compute::ITensorInfo* addOutputInfo = GpuResize::create_op(*sketch,
67  inputTensorInfos[0],
68  resizeAttributes);
69 
70  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
71  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
72  GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
73 
74  // Store the TensorInfos within the blob as unique_ptrs to be used later
75  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
76  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
77 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaResizeValidate()

arm_compute::Status GpuFsaResizeValidate ( const TensorInfo input,
const ResizeDescriptor descriptor 
)

Definition at line 22 of file GpuFsaResize.cpp.

24 {
25  // Create a new workload sketch, for validation purposes
26  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
27  auto workloadContext = GpuWorkloadContext(&compileCtx);
28  GpuWorkloadSketch sketch{ &workloadContext };
29 
30  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
31  aclInputInfo.set_are_values_constant(input.IsConstant());
32  arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
33 
34  ResizeAttributes resizeAttributes = CreateResizeAttributes(descriptor);
35 
36  return GpuResize::validate_op(sketch, inputInfo, resizeAttributes);
37 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaSoftmaxCreateOp()

void GpuFsaSoftmaxCreateOp ( GpuFsaPreCompiledBlob blob,
const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor 
)

Definition at line 63 of file GpuFsaSoftmax.cpp.

67 {
68  GpuWorkloadSketch* sketch = blob->sketch.get();
69  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
70  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
71  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
72 
73  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
74  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, output.GetNumDimensions());
75  aclInputInfo.set_are_values_constant(input.IsConstant());
76  aclOutputInfo.set_are_values_constant(output.IsConstant());
77 
78  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
79  outputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclOutputInfo));
80 
81  // Set Softmax attributes using descriptor
82  SoftmaxAttributes softmaxAttributes{};
83  softmaxAttributes.beta(descriptor.m_Beta); // Only used for LogSoftmax else default
84  softmaxAttributes.is_log_softmax(false); // Use Softmax not LogSoftmax
85  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
86  softmaxAttributes.axis(aclAxis);
87 
88  // Validate operator, check status and update reasonIfUnsupported
89  arm_compute::Status aclStatus = GpuSoftmax::validate_op(*sketch,
90  inputTensorInfos[0],
91  outputTensorInfos[0],
92  softmaxAttributes);
93  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
94  if (!supported)
95  {
96  throw BackendCapabilityException("\"GpuFsa\" backend failed during softmax validation");
97  }
98 
99  GpuSoftmax::create_op(*sketch, inputTensorInfos[0], outputTensorInfos[0], softmaxAttributes);
100 
101  // Store the TensorInfos within the blob as unique_ptrs to be used later
102  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
103  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
104 }

References GpuFsaPreCompiledBlob::sketch, and GpuFsaPreCompiledBlob::workloadContext.

Referenced by GpuFsaBackend::OptimizeSubgraphView().

◆ GpuFsaSoftmaxValidate()

arm_compute::Status GpuFsaSoftmaxValidate ( const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor 
)

Definition at line 22 of file GpuFsaSoftmax.cpp.

25 {
26  // Create a new workload sketch, for validation purposes
27  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
28  auto workloadContext = GpuWorkloadContext(&compileCtx);
29  GpuWorkloadSketch sketch{ &workloadContext };
30 
31  // Build and create tensor infos using the sketch
32  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
33  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, output.GetNumDimensions());
34  aclInputInfo.set_are_values_constant(input.IsConstant());
35  aclOutputInfo.set_are_values_constant(output.IsConstant());
36  arm_compute::ITensorInfo* inputInfo = workloadContext.create_tensor_info(aclInputInfo);
37  arm_compute::ITensorInfo* outputInfo = workloadContext.create_tensor_info(aclOutputInfo);
38 
39  // Set Softmax attributes using descriptor
40  SoftmaxAttributes softmaxAttributes{};
41  softmaxAttributes.beta(descriptor.m_Beta);
42  softmaxAttributes.is_log_softmax(false); // Use Softmax not LogSoftmax
43  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
44  softmaxAttributes.axis(aclAxis);
45 
46  // Validate operator, check status and update reasonIfUnsupported
47  arm_compute::Status aclStatus = GpuSoftmax::validate_op(sketch,
48  inputInfo,
49  outputInfo,
50  softmaxAttributes);
51 
52 #ifndef NDEBUG
53  const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK;
54  if (!validated)
55  {
56  std::cout << "GpuFsaSoftmaxValidate failed: " << aclStatus.error_description() << std::endl;
57  }
58 #endif
59 
60  return aclStatus;
61 }

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ GpuFsaTensorHandleFactoryId()

constexpr const char* armnn::GpuFsaTensorHandleFactoryId ( )
constexpr

Definition at line 14 of file GpuFsaTensorHandleFactory.hpp.

14 { return "Arm/GpuFsa/TensorHandleFactory"; }

Referenced by GpuFsaTensorHandleFactory::GetIdStatic().

◆ HasCapability() [1/4]

bool HasCapability ( const BackendOptions::BackendOption backendOption,
const armnn::BackendId backend 
)

Convenience function to check if a given capability matches a capability in a backend.

Definition at line 80 of file BackendHelper.cpp.

81 {
82  return HasMatchingCapability(backendOption, backend);
83 }

References HasMatchingCapability().

◆ HasCapability() [2/4]

bool HasCapability ( const BackendOptions::BackendOption capability,
const BackendCapabilities capabilities 
)

Convenience function to check if a given capability matches a capability in a BackendCapabilities struct.

Definition at line 75 of file BackendHelper.cpp.

76 {
77  return HasMatchingCapability(capability, capabilities);
78 }

References HasMatchingCapability().

◆ HasCapability() [3/4]

bool HasCapability ( const std::string &  name,
const armnn::BackendId backend 
)

Convenience function to check if a capability exists in a backend.

Definition at line 70 of file BackendHelper.cpp.

71 {
72  return GetCapability(name, backend).has_value();
73 }

References GetCapability().

◆ HasCapability() [4/4]

bool HasCapability ( const std::string &  name,
const BackendCapabilities capabilities 
)

Convenience function to check if a capability exists in a BackendCapabilites struct.

Definition at line 65 of file BackendHelper.cpp.

66 {
67  return GetCapability(name, capabilities).has_value();
68 }

References GetCapability().

◆ HasMatchingCapability() [1/2]

bool HasMatchingCapability ( const BackendOptions::BackendOption backendOption,
const armnn::BackendId backend 
)

Convenience function to check if a given capability matches a capability in a backend.

Definition at line 117 of file BackendHelper.cpp.

118 {
119  auto const& backendRegistry = armnn::BackendRegistryInstance();
120  if (backendRegistry.IsBackendRegistered(backend))
121  {
122  auto factoryFunc = backendRegistry.GetFactory(backend);
123  auto backendObject = factoryFunc();
124  auto capabilities = backendObject->GetCapabilities();
125  return HasMatchingCapability(backendOption, capabilities);
126  }
127  return false;
128 }

References BackendRegistryInstance(), and HasMatchingCapability().

◆ HasMatchingCapability() [2/2]

bool HasMatchingCapability ( const BackendOptions::BackendOption capability,
const BackendCapabilities capabilities 
)

Convenience function to check if a given capability matches a capability in a BackendCapabilities struct.

Definition at line 85 of file BackendHelper.cpp.

86 {
87  for (size_t i=0; i < capabilities.GetOptionCount(); i++)
88  {
89  const auto& backendCapability = capabilities.GetOption(i);
90  if (capability.GetName() == backendCapability.GetName())
91  {
92  if (capability.GetValue().IsBool() && backendCapability.GetValue().IsBool())
93  {
94  return capability.GetValue().AsBool() == backendCapability.GetValue().AsBool();
95  }
96  else if (capability.GetValue().IsFloat() && backendCapability.GetValue().IsFloat())
97  {
98  return capability.GetValue().AsFloat() == backendCapability.GetValue().AsFloat();
99  }
100  else if (capability.GetValue().IsInt() && backendCapability.GetValue().IsInt())
101  {
102  return capability.GetValue().AsInt() == backendCapability.GetValue().AsInt();
103  }
104  else if (capability.GetValue().IsString() && backendCapability.GetValue().IsString())
105  {
106  return capability.GetValue().AsString() == backendCapability.GetValue().AsString();
107  }
108  else if (capability.GetValue().IsUnsignedInt() && backendCapability.GetValue().IsUnsignedInt())
109  {
110  return capability.GetValue().AsUnsignedInt() == backendCapability.GetValue().AsUnsignedInt();
111  }
112  }
113  }
114  return false;
115 }

References BackendOptions::Var::AsBool(), BackendOptions::Var::AsFloat(), BackendOptions::Var::AsInt(), BackendOptions::Var::AsString(), BackendOptions::Var::AsUnsignedInt(), BackendOptions::BackendOption::GetName(), BackendOptions::GetOption(), BackendOptions::GetOptionCount(), BackendOptions::BackendOption::GetValue(), BackendOptions::Var::IsBool(), BackendOptions::Var::IsFloat(), BackendOptions::Var::IsInt(), BackendOptions::Var::IsString(), and BackendOptions::Var::IsUnsignedInt().

Referenced by ArmnnDevice::ArmnnDevice(), CheckFp16Support(), HasCapability(), HasMatchingCapability(), LoadedNetwork::ImportInputs(), LoadedNetwork::ImportOutputs(), and RuntimeImpl::RuntimeImpl().

◆ IgnoreUnused()

void armnn::IgnoreUnused ( Ts &&  ...)
inline

Definition at line 14 of file IgnoreUnused.hpp.

14 {}

Referenced by ClBackendDefaultAllocator::allocate(), DefaultAllocator::allocate(), GpuFsaBackendDefaultAllocator::allocate(), ArgMinMax(), CalculateSlotOptionForOutput(), ITensorHandle::CanBeImported(), ClTensorHandle::CanBeImported(), ClContextControl::ClContextControl(), ClConvolution3dWorkload::ClConvolution3dWorkload(), SpaceToBatchNdLayer::Clone(), SpaceToDepthLayer::Clone(), DynamicBackendUtils::CloseHandle(), ConstTensorPin::ConstTensorPin(), IBackendInternal::CreateExecutionData(), RefTensorHandleFactory::CreateSubTensorHandle(), TosaRefTensorHandleFactory::CreateSubTensorHandle(), SampleDynamicTensorHandleFactory::CreateSubTensorHandle(), SampleDynamicWorkloadFactory::CreateSubTensorHandle(), TosaRefWorkloadFactory::CreateSubTensorHandle(), RefWorkloadFactory::CreateSubTensorHandle(), SampleDynamicTensorHandleFactory::CreateTensorHandle(), RefTensorHandleFactory::CreateTensorHandle(), TosaRefTensorHandleFactory::CreateTensorHandle(), MockTensorHandleFactory::CreateTensorHandle(), ClWorkloadFactory::CreateTensorHandle(), TosaRefWorkloadFactory::CreateTensorHandle(), RefWorkloadFactory::CreateTensorHandle(), ITensorHandleFactory::CreateTensorHandle(), OutputLayer::CreateTensorHandles(), OutputLayer::CreateWorkload(), MergeLayer::CreateWorkload(), UnmapLayer::CreateWorkload(), InputLayer::CreateWorkload(), MapLayer::CreateWorkload(), MemCopyLayer::CreateWorkload(), MemImportLayer::CreateWorkload(), StandInLayer::CreateWorkload(), IBackendInternal::CreateWorkloadFactory(), ITensorHandle::DecorateTensorHandle(), Dequantize(), SelectiveQuantizer< T, false >::Dequantize(), SelectiveQuantizer< armnn::Half, false >::Dequantize(), SelectiveQuantizer< armnn::BFloat16, false >::Dequantize(), DetectionPostProcess(), ProfilerImpl::EndEvent(), SerializerStrategy::ExecuteStrategy(), UnmapLayer::ExecuteStrategy(), MapLayer::ExecuteStrategy(), MemImportLayer::ExecuteStrategy(), FakeQuantizationLayer::ExecuteStrategy(), StrategyBase< DefaultStrategy >::ExecuteStrategy(), ExecutionFrame::ExecuteWorkloads(), FalseFunc(), FalseFuncF16(), FalseFuncF32(), FalseFuncI32(), FalseFuncU8(), FalseInputFuncF16(), FalseInputFuncF32(), FalseOutputFuncF16(), FalseOutputFuncF32(), Gather(), ClImportTensorHandleFactory::GetCapabilities(), NeonTensorHandleFactory::GetCapabilities(), ITensorHandleFactory::GetCapabilities(), DynamicBackendUtils::GetEntryPoint(), DefaultAllocator::GetMemoryRegionAtOffset(), ClBackendDefaultAllocator::GetMemoryRegionAtOffset(), GpuFsaBackendDefaultAllocator::GetMemoryRegionAtOffset(), ICustomAllocator::GetMemoryRegionAtOffset(), IDeserializer::DeserializerImpl::GetNetworkInputBindingInfo(), IDeserializer::DeserializerImpl::GetNetworkOutputBindingInfo(), IDeserializer::DeserializerImpl::GetNormalizationDescriptor(), IDeserializer::DeserializerImpl::GetPooling2dDescriptor(), IDeserializer::DeserializerImpl::GetPooling3dDescriptor(), DynamicBackendUtils::GetSharedObjects(), ITensorHandle::Import(), ClTensorHandle::Import(), ShapeLayer::InferOutputShapes(), SliceLayer::InferOutputShapes(), StackLayer::InferOutputShapes(), StandInLayer::InferOutputShapes(), ReshapeLayer::InferOutputShapes(), SplitterLayer::InferOutputShapes(), NeonLayerSupport::IsActivationSupported(), RefLayerSupport::IsArgMinMaxSupported(), RefLayerSupport::IsBatchMatMulSupported(), RefLayerSupport::IsBatchNormalizationSupported(), RefLayerSupport::IsBatchToSpaceNdSupported(), RefLayerSupport::IsBroadcastToSupported(), RefLayerSupport::IsChannelShuffleSupported(), RefLayerSupport::IsComparisonSupported(), RefLayerSupport::IsConcatSupported(), RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDepthToSpaceSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), RefLayerSupport::IsDetectionPostProcessSupported(), RefLayerSupport::IsElementwiseUnarySupported(), RefLayerSupport::IsFakeQuantizationSupported(), NeonLayerSupport::IsFillSupported(), ClLayerSupport::IsFillSupported(), RefLayerSupport::IsFillSupported(), NeonLayerSupport::IsFloorSupported(), RefLayerSupport::IsFloorSupported(), RefLayerSupport::IsGatherSupported(), IsGpuFsaBackendSupported(), RefLayerSupport::IsInstanceNormalizationSupported(), RefLayerSupport::IsL2NormalizationSupported(), TosaRefLayerSupport::IsLayerSupported(), GpuFsaLayerSupport::IsLayerSupported(), ILayerSupport::IsLayerSupported(), ClLayerSupport::IsLogicalBinarySupported(), RefLayerSupport::IsLogicalBinarySupported(), RefLayerSupport::IsLogSoftmaxSupported(), RefLayerSupport::IsLstmSupported(), RefLayerSupport::IsNormalizationSupported(), RefLayerSupport::IsPadSupported(), RefLayerSupport::IsPermuteSupported(), RefLayerSupport::IsPooling2dSupported(), RefLayerSupport::IsPooling3dSupported(), RefLayerSupport::IsQLstmSupported(), RefLayerSupport::IsRankSupported(), RefLayerSupport::IsReduceSupported(), ClLayerSupport::IsReshapeSupported(), NeonLayerSupport::IsReshapeSupported(), RefLayerSupport::IsReshapeSupported(), RefLayerSupport::IsResizeSupported(), RefLayerSupport::IsShapeSupported(), RefLayerSupport::IsSliceSupported(), RefLayerSupport::IsSoftmaxSupported(), RefLayerSupport::IsSpaceToBatchNdSupported(), RefLayerSupport::IsSpaceToDepthSupported(), ClLayerSupport::IsSplitterSupported(), NeonLayerSupport::IsSplitterSupported(), RefLayerSupport::IsSplitterSupported(), RefLayerSupport::IsStackSupported(), RefLayerSupport::IsStridedSliceSupported(), RefLayerSupport::IsTileSupported(), RefLayerSupport::IsTransposeConvolution2dSupported(), RefLayerSupport::IsTransposeSupported(), RefLayerSupport::IsUnidirectionalSequenceLstmSupported(), Layer::Layer(), LogSoftmax(), ClImportTensorHandle::Map(), ClBackend::ClBackendCustomAllocatorMemoryRegion::map(), GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::map(), ClImportSubTensorHandle::Map(), NeonConvertFp16ToFp32WorkloadValidate(), NeonConvertFp32ToFp16WorkloadValidate(), NeonConvolution3dWorkload::NeonConvolution3dWorkload(), DynamicBackendUtils::OpenHandle(), SelectiveQuantizer< T, false >::Quantize(), SelectiveQuantizer< armnn::Half, false >::Quantize(), SelectiveQuantizer< armnn::BFloat16, false >::Quantize(), BaseWorkload< SplitterQueueDescriptor >::ReplaceInputTensorHandle(), BaseWorkload< SplitterQueueDescriptor >::ReplaceOutputTensorHandle(), OptimizeInverseConversionsImpl::Run(), OptimizeInversePermutesImpl< PermuteType >::Run(), SquashEqualSiblingsImpl< Comparable >::Run(), FuseBatchNorm< ConvLayer, ArmnnType, T >::Run(), ConvertConstants< Converter, Predicate >::Run(), ClImportTensorHandle::SetMemoryGroup(), ClImportSubTensorHandle::SetMemoryGroup(), OpenClTimer::Start(), MemoryManager::StoreMemToAllocate(), TrueFunc(), ClBackend::ClBackendCustomAllocatorMemoryRegion::unmap(), GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::unmap(), IBackendInternal::UpdateExecutionData(), ClBackend::UseCustomMemoryAllocator(), IBackendInternal::UseCustomMemoryAllocator(), WorkingMemHandle::WorkingMemHandle(), Graph::LayerInGraph< InputLayer >::~LayerInGraph(), and Graph::LayerInGraph< OutputLayer >::~LayerInGraph().

◆ IndexToCoordinates()

std::vector<uint32_t> armnn::IndexToCoordinates ( std::vector< uint32_t > &  shape,
uint32_t  index 
)

Definition at line 16 of file Tile.cpp.

17 {
18  std::vector<uint32_t> coordinates;
19  // Iterating through dimensions starting from the last dimension to the first
20  for (std::size_t i = shape.size() - 1; i < shape.size(); --i)
21  {
22  // Coordinate is found by getting the index and modulus it by the current dimension size
23  // shape of dimension = dimension size
24  coordinates.insert(coordinates.begin(), index % shape[i]);
25  // Pass the index to next iteration making index = index / size of the current dimension
26  index = index/shape[i];
27  }
28  return coordinates;
29 }

Referenced by Tile().

◆ InitializeArmComputeClTensorData()

void InitializeArmComputeClTensorData ( arm_compute::CLTensor &  clTensor,
const ConstTensorHandle handle 
)
inline

Definition at line 124 of file ClWorkloadUtils.hpp.

126 {
127  ARMNN_ASSERT(handle);
128 
129  armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor);
130  switch(handle->GetTensorInfo().GetDataType())
131  {
132  case DataType::Float16:
133  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::Half>());
134  break;
135  case DataType::Float32:
136  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<float>());
137  break;
138  case DataType::QAsymmU8:
139  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<uint8_t>());
140  break;
141  case DataType::QAsymmS8:
142  case DataType::QSymmS8:
143  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int8_t>());
144  break;
145  case DataType::QSymmS16:
146  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int16_t>());
147  break;
148  case DataType::Signed32:
149  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int32_t>());
150  break;
151  case DataType::BFloat16:
152  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::BFloat16>());
153  break;
154  default:
155  // Throw exception; assertion not called in release build.
156  throw Exception("Unexpected tensor type during InitializeArmComputeClTensorData().");
157  }
158 };

References ARMNN_ASSERT.

◆ InitializeArmComputeTensorData() [1/2]

void armnn::InitializeArmComputeTensorData ( arm_compute::Tensor &  tensor,
const ConstTensorHandle handle 
)
inline

Definition at line 104 of file NeonWorkloadUtils.hpp.

106 {
107  ARMNN_ASSERT(handle);
108 
109  switch(handle->GetTensorInfo().GetDataType())
110  {
111  case DataType::Float16:
112  CopyArmComputeTensorData(tensor, handle->GetConstTensor<armnn::Half>());
113  break;
114  case DataType::Float32:
115  CopyArmComputeTensorData(tensor, handle->GetConstTensor<float>());
116  break;
117  case DataType::QAsymmU8:
118  CopyArmComputeTensorData(tensor, handle->GetConstTensor<uint8_t>());
119  break;
120  case DataType::QSymmS8:
121  case DataType::QAsymmS8:
122  CopyArmComputeTensorData(tensor, handle->GetConstTensor<int8_t>());
123  break;
124  case DataType::Signed32:
125  CopyArmComputeTensorData(tensor, handle->GetConstTensor<int32_t>());
126  break;
127  case DataType::QSymmS16:
128  CopyArmComputeTensorData(tensor, handle->GetConstTensor<int16_t>());
129  break;
130  case DataType::BFloat16:
131  CopyArmComputeTensorData(tensor, handle->GetConstTensor<armnn::BFloat16>());
132  break;
133  default:
134  // Throw exception; assertion not called in release build.
135  throw Exception("Unexpected tensor type during InitializeArmComputeTensorData().");
136  }
137 };

References ARMNN_ASSERT, BFloat16, CopyArmComputeTensorData(), Float16, Float32, ConstTensorHandle::GetConstTensor(), TensorInfo::GetDataType(), ConstTensorHandle::GetTensorInfo(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ InitializeArmComputeTensorData() [2/2]

void armnn::InitializeArmComputeTensorData ( arm_compute::Tensor &  tensor,
TensorInfo  tensorInfo,
const ITensorHandle handle 
)
inline

Definition at line 68 of file NeonWorkloadUtils.hpp.

71 {
72  ARMNN_ASSERT(handle);
73 
74  switch(tensorInfo.GetDataType())
75  {
76  case DataType::Float16:
77  CopyArmComputeTensorData(tensor, reinterpret_cast<const armnn::Half*>(handle->Map()));
78  break;
79  case DataType::Float32:
80  CopyArmComputeTensorData(tensor, reinterpret_cast<const float*>(handle->Map()));
81  break;
82  case DataType::QAsymmU8:
83  CopyArmComputeTensorData(tensor, reinterpret_cast<const uint8_t*>(handle->Map()));
84  break;
85  case DataType::QSymmS8:
86  case DataType::QAsymmS8:
87  CopyArmComputeTensorData(tensor, reinterpret_cast<const int8_t*>(handle->Map()));
88  break;
89  case DataType::Signed32:
90  CopyArmComputeTensorData(tensor, reinterpret_cast<const int32_t*>(handle->Map()));
91  break;
92  case DataType::QSymmS16:
93  CopyArmComputeTensorData(tensor, reinterpret_cast<const int16_t*>(handle->Map()));
94  break;
95  case DataType::BFloat16:
96  CopyArmComputeTensorData(tensor, reinterpret_cast<const armnn::BFloat16*>(handle->Map()));
97  break;
98  default:
99  // Throw exception; assertion not called in release build.
100  throw Exception("Unexpected tensor type during InitializeArmComputeTensorData().");
101  }
102 };

References ARMNN_ASSERT, BFloat16, CopyArmComputeTensorData(), Float16, Float32, TensorInfo::GetDataType(), ITensorHandle::Map(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

Referenced by NeonFullyConnectedWorkload::Execute(), and NeonConvolution2dWorkload::Execute().

◆ InsertConvertFp16ToFp32LayersBefore()

std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore ( Graph graph,
Layer layer,
bool  expectCorrectInputType 
)

Definition at line 40 of file NetworkUtils.cpp.

43 {
44  std::vector<ConvertFp16ToFp32Layer*> convertLayers;
45  convertLayers.reserve(layer.GetNumInputSlots());
46 
47  // Insert a ConvertFp16ToFp32Layer before each input slot
48  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
49  {
50  bool allowInsert = true;
51  if (expectCorrectInputType)
52  {
53  // Only insert ConvertFp16ToFp32Layer before FP16 input slots
54  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
55  allowInsert =
56  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
57  }
58 
59  if (allowInsert)
60  {
61  const std::string name =
62  std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
63  layer.GetName();
64  ConvertFp16ToFp32Layer* convertLayer =
65  graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
66 
67  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
68  convertInfo.SetDataType(DataType::Float32);
69 
70  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
71 
72  convertLayers.emplace_back(convertLayer);
73  }
74  }
75 
76  return convertLayers;
77 }

References Layer::BeginInputSlots(), Layer::EndInputSlots(), Float16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment(), and ConvertFp32NetworkToFp16Impl::Run().

◆ InsertConvertFp32ToFp16LayersAfter()

std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter ( Graph graph,
Layer layer 
)

Definition at line 79 of file NetworkUtils.cpp.

80 {
81  const unsigned int numOutputSlots = layer.GetNumOutputSlots();
82 
83  std::vector<ConvertFp32ToFp16Layer*> convertLayers;
84  convertLayers.reserve(numOutputSlots);
85 
86  // Update FP16 output slots to FP32 on current layer
87  ChangeOutputFp16ToFp32(layer);
88 
89  // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
90  for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
91  {
92  OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
93  if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
94  {
95  const std::string name =
96  std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
97  ConvertFp32ToFp16Layer* convertLayer =
98  graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
99 
100  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
101  convertInfo.SetDataType(DataType::Float16);
102 
103  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
104 
105  convertLayers.emplace_back(convertLayer);
106  }
107  }
108 
109  return convertLayers;
110 }

References Float16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment(), and ConvertFp32NetworkToFp16Impl::Run().

◆ InsertDebugLayerAfter()

std::vector< DebugLayer * > InsertDebugLayerAfter ( Graph graph,
Layer layer,
bool  toFile 
)

Definition at line 112 of file NetworkUtils.cpp.

113 {
114  std::vector<DebugLayer*> debugLayers;
115  debugLayers.reserve(layer.GetNumOutputSlots());
116 
117  // Connect a DebugLayer to each output slot of the layer
118  uint32_t outputSlotIdx = 0;
119  for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
120  {
121  const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr() + "_" +
122  std::to_string(outputSlotIdx);
123 
124  DebugLayer* debugLayer =
125  graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str(), toFile);
126 
127  // Sets output tensor info for the debug layer.
128  ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
129  TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
130 
131  debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
132 
133  // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
134  debugLayer->SetBackendId(Compute::CpuRef);
135 
136  debugLayers.emplace_back(debugLayer);
137 
138  ++outputSlotIdx;
139  }
140 
141  return debugLayers;
142 }

References ARMNN_ASSERT, Layer::BeginOutputSlots(), CpuRef, Layer::EndOutputSlots(), InputSlot::GetConnectedOutputSlot(), Layer::GetInputSlot(), Layer::GetNameStr(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), Layer::SetBackendId(), and OutputSlot::SetTensorInfo().

Referenced by AddDebugImpl::Run(), and AddDebugToFileImpl::Run().

◆ InstanceNorm()

void InstanceNorm ( const InstanceNormalizationQueueDescriptor data,
const TensorInfo inputInfo,
Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 18 of file InstanceNorm.cpp.

22 {
23  const TensorShape inputShape = inputInfo.GetShape();
24 
25  armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
26 
27  unsigned int inputBatches = inputShape[0];
28  unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
29  unsigned int inputWidth = inputShape[dataLayout.GetWidthIndex()];
30  unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
31 
32  float beta = data.m_Parameters.m_Beta;
33  float eps = data.m_Parameters.m_Eps;
34  float gamma = data.m_Parameters.m_Gamma;
35 
36  for (unsigned int n = 0; n < inputBatches; ++n)
37  {
38  for (unsigned int c = 0; c < inputChannels; ++c)
39  {
40  float mean = 0, var = 0;
41 
42  //Calculate Mean
43  for (unsigned int h = 0; h < inputHeight; h++)
44  {
45  for (unsigned int w = 0; w < inputWidth; w++)
46  {
47  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
48 
49  inputDecoder[index];
50  float value = inputDecoder.Get();
51  mean += value;
52  }
53  }
54  mean /= static_cast<float>(inputHeight * inputWidth);
55 
56  //Calculate Variance
57  for (unsigned int h = 0; h < inputHeight; h++)
58  {
59  for (unsigned int w = 0; w < inputWidth; w++)
60  {
61  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
62 
63  inputDecoder[index];
64  float value = inputDecoder.Get();
65  var += (value - mean) * (value - mean);
66  }
67  }
68  var /= static_cast<float>(inputHeight * inputWidth);
69 
70  // Apply Instance Normalisation
71  for (unsigned int h = 0; h < inputHeight; ++h)
72  {
73  for (unsigned int w = 0; w < inputWidth; ++w)
74  {
75  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
76  inputDecoder[index];
77  outputEncoder[index];
78  outputEncoder.Set((inputDecoder.Get() - mean) * gamma / std::sqrt ( var + eps) + beta);
79  }
80 
81  }
82  }
83  }
84 }

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), InstanceNormalizationDescriptor::m_Beta, InstanceNormalizationDescriptor::m_DataLayout, InstanceNormalizationDescriptor::m_Eps, InstanceNormalizationDescriptor::m_Gamma, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

◆ IntersectionOverUnion()

float IntersectionOverUnion ( const float *  boxI,
const float *  boxJ 
)

Definition at line 31 of file DetectionPostProcess.cpp.

32 {
33  // Box-corner format: ymin, xmin, ymax, xmax.
34  const int yMin = 0;
35  const int xMin = 1;
36  const int yMax = 2;
37  const int xMax = 3;
38  float areaI = (boxI[yMax] - boxI[yMin]) * (boxI[xMax] - boxI[xMin]);
39  float areaJ = (boxJ[yMax] - boxJ[yMin]) * (boxJ[xMax] - boxJ[xMin]);
40  float yMinIntersection = std::max(boxI[yMin], boxJ[yMin]);
41  float xMinIntersection = std::max(boxI[xMin], boxJ[xMin]);
42  float yMaxIntersection = std::min(boxI[yMax], boxJ[yMax]);
43  float xMaxIntersection = std::min(boxI[xMax], boxJ[xMax]);
44  float areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) *
45  std::max(xMaxIntersection - xMinIntersection, 0.0f);
46  float areaUnion = areaI + areaJ - areaIntersection;
47  return areaIntersection / areaUnion;
48 }

Referenced by NonMaxSuppression().

◆ IsBFloat16()

bool armnn::IsBFloat16 ( const WorkloadInfo info)

Definition at line 56 of file RefWorkloadFactory.cpp.

57 {
58  return IsDataType<DataType::BFloat16>(info);
59 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsDataType()

bool IsDataType ( const WorkloadInfo info)

Definition at line 32 of file GpuFsaWorkloadFactory.cpp.

33 {
34  auto checkType = [](const TensorInfo& tensorInfo) {return tensorInfo.GetDataType() == ArmnnType;};
35  auto it = std::find_if(std::begin(info.m_InputTensorInfos), std::end(info.m_InputTensorInfos), checkType);
36  if (it != std::end(info.m_InputTensorInfos))
37  {
38  return true;
39  }
40  it = std::find_if(std::begin(info.m_OutputTensorInfos), std::end(info.m_OutputTensorInfos), checkType);
41  if (it != std::end(info.m_OutputTensorInfos))
42  {
43  return true;
44  }
45  return false;
46 }

References info.

◆ IsFloat16()

bool armnn::IsFloat16 ( const WorkloadInfo info)

Definition at line 60 of file RefWorkloadFactory.cpp.

61 {
62  return IsDataType<DataType::Float16>(info);
63 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsGpuFsaBackendSupported()

bool armnn::IsGpuFsaBackendSupported ( Optional< std::string & >  reasonIfUnsupported,
Args...  args 
)

Definition at line 31 of file GpuFsaLayerSupport.cpp.

32 {
33  IgnoreUnused(reasonIfUnsupported, (args)...);
34 #if defined(ARMCOMPUTEGPUFSA_ENABLED)
35  return true;
36 #else
37  if (reasonIfUnsupported)
38  {
39  reasonIfUnsupported.value() = "The armnn library has been built without CL support";
40  }
41  return false;
42 #endif
43 }

References IgnoreUnused(), and OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by GpuFsaLayerSupport::IsLayerSupported().

◆ IsLayerOptimizable() [1/2]

bool armnn::IsLayerOptimizable ( const armnn::Layer layer)

Definition at line 99 of file MockBackend.cpp.

100 {
101  return IsLayerOptimizable(&layer);
102 }

◆ IsLayerOptimizable() [2/2]

bool armnn::IsLayerOptimizable ( const armnn::Layer layer)

Definition at line 88 of file MockBackend.cpp.

89 {
90  ARMNN_ASSERT(layer != nullptr);
91 
92  // A Layer is not optimizable if its name contains "unoptimizable"
93  const std::string layerName(layer->GetName());
94  bool optimizable = layerName.find("unoptimizable") == std::string::npos;
95 
96  return optimizable;
97 }

References ARMNN_ASSERT, and Layer::GetName().

◆ IsLayerSequence()

bool armnn::IsLayerSequence ( Layer currentLayer,
TYPE  first,
TYPE  second,
TYPE  third,
Layer layerList[4],
bool  handleValidActivates,
const std::vector< ActivationFunction > &  validActivates 
)

Definition at line 375 of file SubgraphUtils.hpp.

382 {
383  auto PreviousLayer = [](Layer& layer)
384  {
385  return &layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
386  };
387 
388  auto NextLayer = [](Layer& layer)
389  {
390  return &layer.GetOutputSlot(0).GetConnection(0)->GetOwningLayer();
391  };
392 
393  auto LayerIncomingConnectionDataType = [](Layer& layer)
394  {
395  return layer.GetInputSlot(0).GetTensorInfo().GetDataType();
396  };
397 
398  bool result = false;
399 
400  // Match in reverse so there is only 1 connection to check
401  if (IsSequenceLayerType(currentLayer, third))
402  {
403  // Save DataType of third layer
404  DataType dataType = LayerIncomingConnectionDataType(currentLayer);
405 
406  // Save third layer
407  layerList[2] = &currentLayer;
408 
409  // Check the layers that proceed this one for the requested grouping
410  Layer *prevLayer = PreviousLayer(currentLayer);
411  if (prevLayer && IsSequenceLayerType(*prevLayer, second))
412  {
413  bool dataTypesMatch = (dataType == LayerIncomingConnectionDataType(*prevLayer));
414  if (! dataTypesMatch)
415  {
416  return result;
417  }
418 
419  layerList[1] = prevLayer;
420  prevLayer = PreviousLayer(*prevLayer);
421  if (prevLayer && IsSequenceLayerType(*prevLayer, first))
422  {
423  dataTypesMatch = (dataType == LayerIncomingConnectionDataType(*prevLayer));
424  if (! dataTypesMatch)
425  {
426  return result;
427  }
428 
429  layerList[0] = prevLayer;
430 
431  // Detected the first 3 layers if we get to this point so now
432  // check to see if we have a valid activation. If there is no activation
433  // then the sequence still matches.
434  if (handleValidActivates)
435  {
436  Layer *nextLayer = NextLayer(currentLayer);
437  if (nextLayer)
438  {
440  {
441  // This layer is an activation, so it must be a valid type for the sequence
442  ActivationFunction activationFunction =
443  PolymorphicDowncast<ActivationLayer*>(nextLayer)->GetParameters().m_Function;
444  long count = std::count(validActivates.cbegin(),
445  validActivates.cend(),
446  activationFunction);
447  if (count > 0)
448  {
449  layerList[3] = nextLayer;
450  result = true;
451  }
452  }
453  else
454  {
455  // Next layer is not an activation so sequence still matches
456  result = true;
457  }
458  }
459  }
460  else
461  {
462  result = true;
463  }
464  }
465  }
466  }
467 
468  return result;
469 }

◆ IsLayerSupported() [1/2]

bool armnn::IsLayerSupported ( const armnn::Layer layer)

Definition at line 83 of file MockBackend.cpp.

84 {
85  return IsLayerSupported(&layer);
86 }

◆ IsLayerSupported() [2/2]

bool armnn::IsLayerSupported ( const armnn::Layer layer)

Definition at line 62 of file MockBackend.cpp.

63 {
64  ARMNN_ASSERT(layer != nullptr);
65 
66  armnn::LayerType layerType = layer->GetType();
67  switch (layerType)
68  {
75  // Layer supported
76  return true;
77  default:
78  // Layer unsupported
79  return false;
80  }
81 }

References Addition, ARMNN_ASSERT, Constant, Convolution2d, ElementwiseBinary, Layer::GetType(), Input, and Output.

Referenced by SampleDynamicWorkloadFactory::IsLayerSupported().

◆ IsLayerTypeSupported()

bool armnn::IsLayerTypeSupported ( const LayerType type,
const std::vector< TensorInfo > &  infos,
const BaseDescriptor descriptor,
const Optional< LstmInputParamsInfo > &  lstmParamsInfo,
const Optional< QuantizedLstmInputParamsInfo > &  quantizedLstmParamsInfo,
Optional< std::string & >  reasonIfUnsupported,
const NeonLayerSupport support 
)

Definition at line 172 of file NeonLayerSupport.cpp.

179 {
180  switch (type)
181  {
183  return support.IsActivationSupported(infos[0],
184  infos[1],
185  *(PolymorphicDowncast<const ActivationDescriptor*>(&descriptor)),
186  reasonIfUnsupported);
187  case LayerType::Addition:
188  return support.IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
190  return support.IsArgMinMaxSupported(infos[0],
191  infos[1],
192  *(PolymorphicDowncast<const ArgMinMaxDescriptor*>(&descriptor)),
193  reasonIfUnsupported);
194  case LayerType::BatchMatMul:
195  return support.IsBatchMatMulSupported(infos[0],
196  infos[1],
197  infos[2],
198  *(PolymorphicDowncast<const BatchMatMulDescriptor*>(&descriptor)),
199  reasonIfUnsupported);
200  case LayerType::BatchNormalization:
201  return support.IsBatchNormalizationSupported(infos[0],
202  infos[1],
203  infos[2],
204  infos[3],
205  infos[4],
206  infos[5],
207  *(PolymorphicDowncast<const
208  BatchNormalizationDescriptor*>(&descriptor)),
209  reasonIfUnsupported);
211  return support.IsBatchToSpaceNdSupported(infos[0],
212  infos[1],
213  *(PolymorphicDowncast<const
214  BatchToSpaceNdDescriptor*>(&descriptor)),
215  reasonIfUnsupported);
216  case LayerType::Cast:
217  return support.IsCastSupported(infos[0], infos[1], reasonIfUnsupported);
218  case LayerType::ChannelShuffle:
219  return support.IsChannelShuffleSupported(infos[0],
220  infos[1],
221  *(PolymorphicDowncast<const
222  ChannelShuffleDescriptor*>(&descriptor)),
223  reasonIfUnsupported);
224  case LayerType::Comparison:
225  return support.IsComparisonSupported(infos[0],
226  infos[1],
227  infos[2],
228  *(PolymorphicDowncast<const ComparisonDescriptor*>(&descriptor)),
229  reasonIfUnsupported);
230  case LayerType::Concat:
231  {
232  std::vector<const TensorInfo*> inputInfos;
233  for (uint32_t i = 0; i < (infos.size() - 1); i++)
234  {
235  inputInfos.push_back(&infos[i]);
236  }
237  return support.IsConcatSupported(inputInfos,
238  infos[infos.size() - 1],
239  *(PolymorphicDowncast<const OriginsDescriptor*>(&descriptor)),
240  reasonIfUnsupported);
241  }
242  case LayerType::Constant:
243  return support.IsConstantSupported(infos[0], reasonIfUnsupported);
244  case LayerType::ConvertFp16ToFp32:
245  return support.IsConvertFp16ToFp32Supported(infos[0], infos[1], reasonIfUnsupported);
246  case LayerType::ConvertFp32ToFp16:
247  return support.IsConvertFp32ToFp16Supported(infos[0], infos[1], reasonIfUnsupported);
248  case LayerType::Convolution2d:
249  {
250  if (infos.size() != 4)
251  {
252  throw InvalidArgumentException("Invalid number of TransposeConvolution2d TensorInfos. "
253  "TensorInfos should be of format: {input, output, weights, biases}.");
254  }
255 
256  auto desc = *(PolymorphicDowncast<const Convolution2dDescriptor*>(&descriptor));
257  if (infos[3] == TensorInfo())
258  {
259  return support.IsConvolution2dSupported(infos[0],
260  infos[1],
261  desc,
262  infos[2],
263  EmptyOptional(),
264  reasonIfUnsupported);
265  }
266  else
267  {
268  return support.IsConvolution2dSupported(infos[0],
269  infos[1],
270  desc,
271  infos[2],
272  infos[3],
273  reasonIfUnsupported);
274  }
275  }
276  case LayerType::Convolution3d:
277  {
278  if (infos.size() != 4)
279  {
280  throw InvalidArgumentException("Invalid number of Convolution3d TensorInfos. "
281  "TensorInfos should be of format: {input, output, weights, biases}.");
282  }
283 
284  auto desc = *(PolymorphicDowncast<const Convolution3dDescriptor*>(&descriptor));
285  if (infos[3] == TensorInfo())
286  {
287  return support.IsConvolution3dSupported(infos[0],
288  infos[1],
289  desc,
290  infos[2],
291  EmptyOptional(),
292  reasonIfUnsupported);
293  }
294  else
295  {
296  return support.IsConvolution3dSupported(infos[0],
297  infos[1],
298  desc,
299  infos[2],
300  infos[3],
301  reasonIfUnsupported);
302  }
303  }
305  return support.IsDepthToSpaceSupported(infos[0],
306  infos[1],
307  *(PolymorphicDowncast<const DepthToSpaceDescriptor*>(&descriptor)),
308  reasonIfUnsupported);
309  case LayerType::DepthwiseConvolution2d:
310  {
311  if (infos.size() != 4)
312  {
313  throw InvalidArgumentException("Invalid number of DepthwiseConvolution2d TensorInfos. "
314  "TensorInfos should be of format: {input, output, weights, biases}.");
315  }
316 
317  auto desc = *(PolymorphicDowncast<const DepthwiseConvolution2dDescriptor*>(&descriptor));
318  if (infos[3] == TensorInfo())
319  {
320  return support.IsDepthwiseConvolutionSupported(infos[0],
321  infos[1],
322  desc,
323  infos[2],
324  EmptyOptional(),
325  reasonIfUnsupported);
326  }
327  else
328  {
329  return support.IsDepthwiseConvolutionSupported(infos[0],
330  infos[1],
331  desc,
332  infos[2],
333  infos[3],
334  reasonIfUnsupported);
335  }
336  }
338  return support.IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported);
340  {
341  auto desc = *(PolymorphicDowncast<const DetectionPostProcessDescriptor*>(&descriptor));
342  return support.IsDetectionPostProcessSupported(infos[0],
343  infos[1],
344  infos[2],
345  infos[3],
346  infos[4],
347  infos[5],
348  infos[6],
349  desc,
350  reasonIfUnsupported);
351  }
352  case LayerType::Division:
353  return support.IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
354  case LayerType::ElementwiseBinary:
355  {
356  auto desc = *(PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&descriptor));
357 
358  switch (desc.m_Operation)
359  {
360  case BinaryOperation::Add:
362  reasonIfUnsupported,
363  infos[0],
364  infos[1],
365  infos[2],
366  nullptr);
367  case BinaryOperation::Div:
369  reasonIfUnsupported,
370  infos[0],
371  infos[1],
372  infos[2],
373  nullptr);
374  case BinaryOperation::Maximum:
376  reasonIfUnsupported,
377  infos[0],
378  infos[1],
379  infos[2]);
380  case BinaryOperation::Minimum:
382  reasonIfUnsupported,
383  infos[0],
384  infos[1],
385  infos[2]);
386  case BinaryOperation::Mul:
388  reasonIfUnsupported,
389  infos[0],
390  infos[1],
391  infos[2],
392  nullptr);
393  case BinaryOperation::Power:
394  case BinaryOperation::SqDiff:
396  reasonIfUnsupported,
397  infos[0],
398  infos[1],
399  infos[2],
400  desc,
401  nullptr);
402  case BinaryOperation::Sub:
404  reasonIfUnsupported,
405  infos[0],
406  infos[1],
407  infos[2],
408  nullptr);
409  default:
410  return false;
411  }
412  }
413  case LayerType::ElementwiseUnary:
414  return support.IsElementwiseUnarySupported(infos[0],
415  infos[1],
416  *(PolymorphicDowncast<const
417  ElementwiseUnaryDescriptor*>(&descriptor)),
418  reasonIfUnsupported);
419  case LayerType::Fill:
420  return support.IsFillSupported(infos[0],
421  infos[1],
422  *(PolymorphicDowncast<const FillDescriptor*>(&descriptor)),
423  reasonIfUnsupported);
424  case LayerType::Floor:
425  return support.IsFloorSupported(infos[0], infos[1], reasonIfUnsupported);
427  return support.IsFullyConnectedSupported(infos[0],
428  infos[1],
429  infos[2],
430  infos[3],
431  *(PolymorphicDowncast<const
432  FullyConnectedDescriptor*>(&descriptor)),
433  reasonIfUnsupported);
434  case LayerType::Fused:
435  {
436  auto fusedDescriptor = *(PolymorphicDowncast<const FusedDescriptor*>(&descriptor));
437  if (fusedDescriptor.m_NumInputSlots + fusedDescriptor.m_NumOutputSlots != infos.size())
438  {
439  throw InvalidArgumentException("Invalid number of FusedLayer TensorInfos.");
440  }
441 
442  auto it = infos.begin() + numeric_cast<TensorInfo::DifferenceType>(fusedDescriptor.m_NumInputSlots);
443  std::vector<TensorInfo> inputInfos(infos.begin(), it);
444  std::vector<TensorInfo> outputInfos(it, infos.end());
445 
446  return support.IsFusedSupported({inputInfos.begin(), inputInfos.end()},
447  {outputInfos.begin(), outputInfos.end()},
448  fusedDescriptor,
449  reasonIfUnsupported);
450  }
451  case LayerType::Gather:
452  return support.IsGatherSupported(infos[0],
453  infos[1],
454  infos[2],
455  *(PolymorphicDowncast<const GatherDescriptor*>(&descriptor)),
456  reasonIfUnsupported);
457  case LayerType::GatherNd:
458  return support.IsGatherNdSupported(infos[0],
459  infos[1],
460  infos[2],
461  reasonIfUnsupported);
462  case LayerType::Input:
463  return support.IsInputSupported(infos[0], reasonIfUnsupported);
464  case LayerType::InstanceNormalization:
465  return support.IsInstanceNormalizationSupported(infos[0],
466  infos[1],
467  *(PolymorphicDowncast<const
468  InstanceNormalizationDescriptor*>(&descriptor)),
469  reasonIfUnsupported);
470  case LayerType::L2Normalization:
471  return support.IsL2NormalizationSupported(infos[0],
472  infos[1],
473  *(PolymorphicDowncast<const
474  L2NormalizationDescriptor*>(&descriptor)),
475  reasonIfUnsupported);
476  case LayerType::LogicalBinary:
477  return support.IsLogicalBinarySupported(infos[0],
478  infos[1],
479  infos[2],
480  *(PolymorphicDowncast<const
481  LogicalBinaryDescriptor*>(&descriptor)),
482  reasonIfUnsupported);
484  return support.IsLogSoftmaxSupported(infos[0],
485  infos[1],
486  *(PolymorphicDowncast<const LogSoftmaxDescriptor*>(&descriptor)),
487  reasonIfUnsupported);
488  case LayerType::Lstm:
489  return support.IsLstmSupported(infos[0],
490  infos[1],
491  infos[2],
492  infos[3],
493  infos[4],
494  infos[5],
495  infos[6],
496  *(PolymorphicDowncast<const LstmDescriptor*>(&descriptor)),
497  lstmParamsInfo.value(),
498  reasonIfUnsupported);
499  case LayerType::Map:
500  return true;
501  case LayerType::Maximum:
502  return support.IsMaximumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
503  case LayerType::Mean:
504  return support.IsMeanSupported(infos[0],
505  infos[1],
506  *(PolymorphicDowncast<const MeanDescriptor*>(&descriptor)),
507  reasonIfUnsupported);
508  case LayerType::MemCopy:
509  return support.IsMemCopySupported(infos[0], infos[1], reasonIfUnsupported);
510  case LayerType::MemImport:
511  return support.IsMemImportSupported(infos[0], infos[1], reasonIfUnsupported);
512  case LayerType::Merge:
513  return support.IsMergeSupported(infos[0],
514  infos[1],
515  infos[2],
516  reasonIfUnsupported);
517  case LayerType::Minimum:
518  return support.IsMinimumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
519  case LayerType::Multiplication:
520  return support.IsMultiplicationSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
521  case LayerType::Normalization:
522  return support.IsNormalizationSupported(infos[0],
523  infos[1],
524  *(PolymorphicDowncast<const
525  NormalizationDescriptor*>(&descriptor)),
526  reasonIfUnsupported);
527  case LayerType::Output:
528  return support.IsOutputSupported(infos[0], reasonIfUnsupported);
529  case LayerType::Pad:
530  return support.IsPadSupported(infos[0],
531  infos[1],
532  *(PolymorphicDowncast<const PadDescriptor*>(&descriptor)),
533  reasonIfUnsupported);
534  case LayerType::Permute:
535  return support.IsPermuteSupported(infos[0],
536  infos[1],
537  *(PolymorphicDowncast<const PermuteDescriptor*>(&descriptor)),
538  reasonIfUnsupported);
540  return support.IsPooling2dSupported(infos[0],
541  infos[1],
542  *(PolymorphicDowncast<const Pooling2dDescriptor*>(&descriptor)),
543  reasonIfUnsupported);
545  return support.IsPooling3dSupported(infos[0],
546  infos[1],
547  *(PolymorphicDowncast<const Pooling3dDescriptor*>(&descriptor)),
548  reasonIfUnsupported);
549  case LayerType::Prelu:
550  return support.IsPreluSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
551  case LayerType::QLstm:
552  return support.IsQLstmSupported(infos[0],
553  infos[1],
554  infos[2],
555  infos[3],
556  infos[4],
557  infos[5],
558  *(PolymorphicDowncast<const QLstmDescriptor*>(&descriptor)),
559  lstmParamsInfo.value(),
560  reasonIfUnsupported);
561  case LayerType::Quantize:
562  return support.IsQuantizeSupported(infos[0], infos[1], reasonIfUnsupported);
563  case LayerType::QuantizedLstm:
564  return support.IsQuantizedLstmSupported(infos[0],
565  infos[1],
566  infos[2],
567  infos[3],
568  infos[4],
569  quantizedLstmParamsInfo.value(),
570  reasonIfUnsupported);
571  case LayerType::Rank:
572  return true;
573  case LayerType::Reshape:
574  return support.IsReshapeSupported(infos[0],
575  infos[1],
576  *(PolymorphicDowncast<const ReshapeDescriptor*>(&descriptor)),
577  reasonIfUnsupported);
578  case LayerType::Resize:
579  return support.IsResizeSupported(infos[0],
580  infos[1],
581  *(PolymorphicDowncast<const ResizeDescriptor*>(&descriptor)),
582  reasonIfUnsupported);
583  case LayerType::Reduce:
584  return support.IsReduceSupported(infos[0],
585  infos[1],
586  *(PolymorphicDowncast<const ReduceDescriptor*>(&descriptor)),
587  reasonIfUnsupported);
589  return support.IsReverseV2Supported(infos[0],
590  infos[1],
591  infos[2],
592  reasonIfUnsupported);
593  case LayerType::Shape:
594  return support.IsShapeSupported(infos[0],
595  infos[1],
596  reasonIfUnsupported);
597  case LayerType::Slice:
598  return support.IsSliceSupported(infos[0],
599  infos[1],
600  *(PolymorphicDowncast<const SliceDescriptor*>(&descriptor)),
601  reasonIfUnsupported);
602  case LayerType::Softmax:
603  return support.IsSoftmaxSupported(infos[0],
604  infos[1],
605  *(PolymorphicDowncast<const SoftmaxDescriptor*>(&descriptor)),
606  reasonIfUnsupported);
608  return support.IsSpaceToBatchNdSupported(infos[0],
609  infos[1],
610  *(PolymorphicDowncast<const
611  SpaceToBatchNdDescriptor*>(&descriptor)),
612  reasonIfUnsupported);
614  return support.IsSpaceToDepthSupported(infos[0],
615  infos[1],
616  *(PolymorphicDowncast<const SpaceToDepthDescriptor*>(&descriptor)),
617  reasonIfUnsupported);
618  case LayerType::Splitter:
619  {
620  std::vector<TensorInfo> outputInfos;
621  for (uint32_t i = 1; i < infos.size(); i++)
622  {
623  outputInfos.push_back(infos[i]);
624  }
625  return support.IsSplitterSupported(infos[0],
626  {outputInfos.begin(), outputInfos.end()},
627  *(PolymorphicDowncast<const ViewsDescriptor*>(&descriptor)),
628  reasonIfUnsupported);
629  }
630  case LayerType::Stack:
631  {
632  std::vector<const TensorInfo*> inputInfos;
633  for (uint32_t i = 0; i < infos.size() - 1; i++)
634  {
635  inputInfos.push_back(&infos[i]);
636  }
637  return support.IsStackSupported(inputInfos,
638  infos[infos.size() - 1],
639  *(PolymorphicDowncast<const StackDescriptor*>(&descriptor)),
640  reasonIfUnsupported);
641  }
643  return support.IsStridedSliceSupported(infos[0],
644  infos[1],
645  *(PolymorphicDowncast<const StridedSliceDescriptor*>(&descriptor)),
646  reasonIfUnsupported);
647  case LayerType::Subtraction:
648  return support.IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
649  case LayerType::Tile:
650  return support.IsTileSupported(infos[0],
651  infos[1],
652  *(PolymorphicDowncast<const TileDescriptor*>(&descriptor)),
653  reasonIfUnsupported);
655  return support.IsTransposeSupported(infos[0],
656  infos[1],
657  *(PolymorphicDowncast<const TransposeDescriptor*>(&descriptor)),
658  reasonIfUnsupported);
659  case LayerType::TransposeConvolution2d:
660  {
661  if (infos.size() != 4)
662  {
663  throw InvalidArgumentException("Invalid number of TransposeConvolution2d TensorInfos. "
664  "TensorInfos should be of format: {input, output, weights, biases}.");
665  }
666 
667  auto desc = *(PolymorphicDowncast<const TransposeConvolution2dDescriptor*>(&descriptor));
668  if (infos[3] == TensorInfo())
669  {
670  return support.IsTransposeConvolution2dSupported(infos[0],
671  infos[1],
672  desc,
673  infos[2],
674  EmptyOptional(),
675  reasonIfUnsupported);
676  }
677  else
678  {
679  return support.IsTransposeConvolution2dSupported(infos[0],
680  infos[1],
681  desc,
682  infos[2],
683  infos[3],
684  reasonIfUnsupported);
685  }
686  }
687  case LayerType::UnidirectionalSequenceLstm:
688  {
689  auto desc = *(PolymorphicDowncast<const UnidirectionalSequenceLstmDescriptor*>(&descriptor));
690  return support.IsUnidirectionalSequenceLstmSupported(infos[0],
691  infos[1],
692  infos[2],
693  infos[3],
694  infos[4],
695  infos[5],
696  desc,
697  lstmParamsInfo.value(),
698  reasonIfUnsupported);
699  }
700  case LayerType::Unmap:
701  return true;
702  default:
703  // layers not supported in neon by default:
704  // debug, fakequantization, precompiled,
705  // standin, switch
706  return false;
707  }
708 }

References Activation, Add, Addition, ArgMinMax, BatchMatMul, BatchNormalization, BatchToSpaceNd, Cast, ChannelShuffle, Comparison, Concat, Constant, ConvertFp16ToFp32, ConvertFp32ToFp16, Convolution2d, Convolution3d, DepthToSpace, DepthwiseConvolution2d, Dequantize, DetectionPostProcess, Div, Division, ElementwiseBinary, ElementwiseUnary, Fill, Floor, FORWARD_WORKLOAD_VALIDATE_FUNC, FullyConnected, Fused, Gather, GatherNd, Input, InstanceNormalization, NeonLayerSupport::IsActivationSupported(), NeonLayerSupport::IsAdditionSupported(), NeonLayerSupport::IsArgMinMaxSupported(), NeonLayerSupport::IsBatchMatMulSupported(), NeonLayerSupport::IsBatchNormalizationSupported(), NeonLayerSupport::IsBatchToSpaceNdSupported(), NeonLayerSupport::IsCastSupported(), NeonLayerSupport::IsChannelShuffleSupported(), NeonLayerSupport::IsComparisonSupported(), NeonLayerSupport::IsConcatSupported(), NeonLayerSupport::IsConstantSupported(), NeonLayerSupport::IsConvertFp16ToFp32Supported(), NeonLayerSupport::IsConvertFp32ToFp16Supported(), NeonLayerSupport::IsConvolution2dSupported(), NeonLayerSupport::IsConvolution3dSupported(), NeonLayerSupport::IsDepthToSpaceSupported(), NeonLayerSupport::IsDepthwiseConvolutionSupported(), NeonLayerSupport::IsDequantizeSupported(), LayerSupportBase::IsDetectionPostProcessSupported(), NeonLayerSupport::IsDivisionSupported(), NeonLayerSupport::IsElementwiseUnarySupported(), NeonLayerSupport::IsFillSupported(), NeonLayerSupport::IsFloorSupported(), NeonLayerSupport::IsFullyConnectedSupported(), NeonLayerSupport::IsFusedSupported(), NeonLayerSupport::IsGatherNdSupported(), NeonLayerSupport::IsGatherSupported(), NeonLayerSupport::IsInputSupported(), NeonLayerSupport::IsInstanceNormalizationSupported(), NeonLayerSupport::IsL2NormalizationSupported(), NeonLayerSupport::IsLogicalBinarySupported(), NeonLayerSupport::IsLogSoftmaxSupported(), NeonLayerSupport::IsLstmSupported(), NeonLayerSupport::IsMaximumSupported(), NeonLayerSupport::IsMeanSupported(), LayerSupportBase::IsMemCopySupported(), LayerSupportBase::IsMemImportSupported(), LayerSupportBase::IsMergeSupported(), NeonLayerSupport::IsMinimumSupported(), NeonLayerSupport::IsMultiplicationSupported(), NeonLayerSupport::IsNormalizationSupported(), NeonLayerSupport::IsOutputSupported(), NeonLayerSupport::IsPadSupported(), NeonLayerSupport::IsPermuteSupported(), NeonLayerSupport::IsPooling2dSupported(), NeonLayerSupport::IsPooling3dSupported(), NeonLayerSupport::IsPreluSupported(), NeonLayerSupport::IsQLstmSupported(), NeonLayerSupport::IsQuantizedLstmSupported(), NeonLayerSupport::IsQuantizeSupported(), NeonLayerSupport::IsReduceSupported(), NeonLayerSupport::IsReshapeSupported(), NeonLayerSupport::IsResizeSupported(), NeonLayerSupport::IsReverseV2Supported(), LayerSupportBase::IsShapeSupported(), NeonLayerSupport::IsSliceSupported(), NeonLayerSupport::IsSoftmaxSupported(), NeonLayerSupport::IsSpaceToBatchNdSupported(), NeonLayerSupport::IsSpaceToDepthSupported(), NeonLayerSupport::IsSplitterSupported(), NeonLayerSupport::IsStackSupported(), NeonLayerSupport::IsStridedSliceSupported(), NeonLayerSupport::IsSubtractionSupported(), NeonLayerSupport::IsTileSupported(), NeonLayerSupport::IsTransposeConvolution2dSupported(), NeonLayerSupport::IsTransposeSupported(), NeonLayerSupport::IsUnidirectionalSequenceLstmSupported(), L2Normalization, LogicalBinary, LogSoftmax, Lstm, Map, Maximum, Mean, MemCopy, MemImport, Merge, Minimum, Mul, Multiplication, NeonAdditionWorkloadValidate(), NeonDivisionWorkloadValidate(), NeonElementwiseBinaryWorkloadValidate(), NeonMaximumWorkloadValidate(), NeonMinimumWorkloadValidate(), NeonMultiplicationWorkloadValidate(), NeonSubtractionWorkloadValidate(), Normalization, Output, Pad, Permute, PolymorphicDowncast(), Pooling2d, Pooling3d, Power, Prelu, QLstm, Quantize, QuantizedLstm, Rank, Reduce, Reshape, Resize, ReverseV2, Shape, Slice, Softmax, SpaceToBatchNd, SpaceToDepth, Splitter, SqDiff, Stack, StridedSlice, Sub, Subtraction, Tile, Transpose, TransposeConvolution2d, UnidirectionalSequenceLstm, Unmap, and OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by NeonLayerSupport::IsLayerSupported().

◆ IsNCHW()

bool armnn::IsNCHW ( armnn::Layer layer)
inline

Definition at line 213 of file SubgraphUtils.hpp.

214 {
215  CheckForNCHW check;
216  layer.ExecuteStrategy(check);
217  return check.Result();
218 }

References Layer::ExecuteStrategy().

Referenced by ConnectedToLayerWithNCHW().

◆ IsOperationQueueDescriptor() [1/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const ConstantQueueDescriptor )
constexpr

Definition at line 22 of file RefWorkloadFactory.hpp.

22 { return false; }

◆ IsOperationQueueDescriptor() [2/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const MemCopyQueueDescriptor )
constexpr

Definition at line 20 of file RefWorkloadFactory.hpp.

20 { return false; }

◆ IsOperationQueueDescriptor() [3/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const PermuteQueueDescriptor )
constexpr

Definition at line 24 of file RefWorkloadFactory.hpp.

24 { return false; }

◆ IsOperationQueueDescriptor() [4/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const QueueDescriptorType &  )
constexpr

Definition at line 18 of file RefWorkloadFactory.hpp.

18 { return true; }

◆ IsQAsymmS8()

bool armnn::IsQAsymmS8 ( const WorkloadInfo info)

Definition at line 72 of file RefWorkloadFactory.cpp.

73 {
74  return IsDataType<DataType::QAsymmS8>(info);
75 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQAsymmU8()

bool armnn::IsQAsymmU8 ( const WorkloadInfo info)

Definition at line 76 of file RefWorkloadFactory.cpp.

77 {
78  return IsDataType<DataType::QAsymmU8>(info);
79 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQSymmS16()

bool armnn::IsQSymmS16 ( const WorkloadInfo info)

Definition at line 64 of file RefWorkloadFactory.cpp.

65 {
66  return IsDataType<DataType::QSymmS16>(info);
67 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQSymmS8()

bool armnn::IsQSymmS8 ( const WorkloadInfo info)

Definition at line 68 of file RefWorkloadFactory.cpp.

69 {
70  return IsDataType<DataType::QSymmS8>(info);
71 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsQuantized8BitType()

constexpr bool armnn::IsQuantized8BitType ( DataType  dataType)
constexpr

Definition at line 316 of file TypesUtils.hpp.

317 {
318  return dataType == DataType::QAsymmU8 ||
319  dataType == DataType::QAsymmS8 ||
320  dataType == DataType::QSymmS8;
321 }

References QAsymmS8, QAsymmU8, and QSymmS8.

Referenced by RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), IsQuantizedType(), and RefLayerSupport::IsTransposeConvolution2dSupported().

◆ IsQuantizedType() [1/2]

constexpr bool armnn::IsQuantizedType ( )
constexpr

◆ IsQuantizedType() [2/2]

constexpr bool armnn::IsQuantizedType ( DataType  dataType)
constexpr

Definition at line 323 of file TypesUtils.hpp.

324 {
325  return dataType == DataType::QSymmS16 || IsQuantized8BitType(dataType);
326 }

References IsQuantized8BitType(), and QSymmS16.

◆ IsReadyForSplitAssignment()

bool armnn::IsReadyForSplitAssignment ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo 
)

Definition at line 374 of file SubgraphViewSelector.cpp.

375 {
376  bool ready = true;
377  ForEachLayerInput(layerInfos, layerInfo,
378  [&ready](LayerSelectionInfo& parentInfo)
379  {
380  if (!parentInfo.m_IsProcessed)
381  {
382  ready = false;
383  }
384  });
385  return ready;
386 }

References ForEachLayerInput().

Referenced by SubgraphViewSelector::SelectSubgraphs().

◆ IsSequenceLayerType() [1/2]

bool armnn::IsSequenceLayerType ( Layer layer,
BinaryOperation  type 
)
inline

Definition at line 367 of file SubgraphUtils.hpp.

368 {
369  return (layer.GetType() == LayerType::ElementwiseBinary) &&
370  (PolymorphicDowncast<ElementwiseBinaryLayer*>(&layer)->GetParameters().m_Operation == type);
371 }

References ElementwiseBinary, and Layer::GetType().

◆ IsSequenceLayerType() [2/2]

bool armnn::IsSequenceLayerType ( Layer layer,
LayerType  type 
)
inline

Definition at line 362 of file SubgraphUtils.hpp.

363 {
364  return layer.GetType() == type;
365 }

References Layer::GetType().

Referenced by BuildAddMulAddTensorInfoLists().

◆ IsSigned32()

bool armnn::IsSigned32 ( const WorkloadInfo info)

Definition at line 52 of file RefWorkloadFactory.cpp.

53 {
54  return IsDataType<DataType::Signed32>(info);
55 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsSigned64()

bool armnn::IsSigned64 ( const WorkloadInfo info)

Definition at line 48 of file RefWorkloadFactory.cpp.

49 {
50  return IsDataType<DataType::Signed64>(info);
51 }

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

◆ IsSupportedForDataTypeGeneric()

bool armnn::IsSupportedForDataTypeGeneric ( Optional< std::string & >  reasonIfUnsupported,
DataType  dataType,
Float16Func  float16FuncPtr,
Float32Func  float32FuncPtr,
Uint8Func  uint8FuncPtr,
Int32Func  int32FuncPtr,
BooleanFunc  booleanFuncPtr,
Params &&...  params 
)

Definition at line 27 of file LayerSupportCommon.hpp.

35 {
36  switch(dataType)
37  {
38  case DataType::Float16:
39  return float16FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
40  case DataType::Float32:
41  return float32FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
42  case DataType::QAsymmU8:
43  return uint8FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
44  case DataType::Signed32:
45  return int32FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
46  case DataType::Boolean:
47  return booleanFuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
48  default:
49  return false;
50  }
51 }

References Boolean, Float16, Float32, QAsymmU8, and Signed32.

Referenced by RefLayerSupport::IsConvertFp16ToFp32Supported(), RefLayerSupport::IsConvertFp32ToFp16Supported(), and NeonLayerSupport::IsFloorSupported().

◆ LayerEnumOf() [1/78]

constexpr LayerType armnn::LayerEnumOf ( const ActivationLayer )
constexpr

Definition at line 114 of file LayersFwd.hpp.

◆ LayerEnumOf() [2/78]

constexpr LayerType armnn::LayerEnumOf ( const AdditionLayer )
constexpr

Definition at line 115 of file LayersFwd.hpp.

◆ LayerEnumOf() [3/78]

constexpr LayerType armnn::LayerEnumOf ( const ArgMinMaxLayer )
constexpr

Definition at line 116 of file LayersFwd.hpp.

◆ LayerEnumOf() [4/78]

constexpr LayerType armnn::LayerEnumOf ( const BatchMatMulLayer )
constexpr

Definition at line 117 of file LayersFwd.hpp.

◆ LayerEnumOf() [5/78]

constexpr LayerType armnn::LayerEnumOf ( const BatchNormalizationLayer )
constexpr

Definition at line 118 of file LayersFwd.hpp.

◆ LayerEnumOf() [6/78]

constexpr LayerType armnn::LayerEnumOf ( const BatchToSpaceNdLayer )
constexpr

Definition at line 119 of file LayersFwd.hpp.

◆ LayerEnumOf() [7/78]

constexpr LayerType armnn::LayerEnumOf ( const BroadcastToLayer )
constexpr

Definition at line 120 of file LayersFwd.hpp.

◆ LayerEnumOf() [8/78]

constexpr LayerType armnn::LayerEnumOf ( const CastLayer )
constexpr

Definition at line 121 of file LayersFwd.hpp.

◆ LayerEnumOf() [9/78]

constexpr LayerType armnn::LayerEnumOf ( const ChannelShuffleLayer )
constexpr

Definition at line 122 of file LayersFwd.hpp.

◆ LayerEnumOf() [10/78]

constexpr LayerType armnn::LayerEnumOf ( const ComparisonLayer )
constexpr

Definition at line 123 of file LayersFwd.hpp.

◆ LayerEnumOf() [11/78]

constexpr LayerType armnn::LayerEnumOf ( const ConcatLayer )
constexpr

Definition at line 124 of file LayersFwd.hpp.

◆ LayerEnumOf() [12/78]

constexpr LayerType armnn::LayerEnumOf ( const ConstantLayer )
constexpr

Definition at line 125 of file LayersFwd.hpp.

◆ LayerEnumOf() [13/78]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp16ToFp32Layer )
constexpr

Definition at line 126 of file LayersFwd.hpp.

◆ LayerEnumOf() [14/78]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp32ToFp16Layer )
constexpr

Definition at line 127 of file LayersFwd.hpp.

◆ LayerEnumOf() [15/78]

constexpr LayerType armnn::LayerEnumOf ( const Convolution2dLayer )
constexpr

Definition at line 128 of file LayersFwd.hpp.

◆ LayerEnumOf() [16/78]

constexpr LayerType armnn::LayerEnumOf ( const Convolution3dLayer )
constexpr

Definition at line 129 of file LayersFwd.hpp.

◆ LayerEnumOf() [17/78]

constexpr LayerType armnn::LayerEnumOf ( const DebugLayer )
constexpr

Definition at line 130 of file LayersFwd.hpp.

◆ LayerEnumOf() [18/78]

constexpr LayerType armnn::LayerEnumOf ( const DepthToSpaceLayer )
constexpr

Definition at line 131 of file LayersFwd.hpp.

◆ LayerEnumOf() [19/78]

constexpr LayerType armnn::LayerEnumOf ( const DepthwiseConvolution2dLayer )
constexpr

Definition at line 132 of file LayersFwd.hpp.

◆ LayerEnumOf() [20/78]

constexpr LayerType armnn::LayerEnumOf ( const DequantizeLayer )
constexpr

Definition at line 133 of file LayersFwd.hpp.

◆ LayerEnumOf() [21/78]

constexpr LayerType armnn::LayerEnumOf ( const DetectionPostProcessLayer )
constexpr

Definition at line 134 of file LayersFwd.hpp.

◆ LayerEnumOf() [22/78]

constexpr LayerType armnn::LayerEnumOf ( const DivisionLayer )
constexpr

Definition at line 135 of file LayersFwd.hpp.

◆ LayerEnumOf() [23/78]

constexpr LayerType armnn::LayerEnumOf ( const ElementwiseBinaryLayer )
constexpr

Definition at line 136 of file LayersFwd.hpp.

◆ LayerEnumOf() [24/78]

constexpr LayerType armnn::LayerEnumOf ( const ElementwiseUnaryLayer )
constexpr

Definition at line 137 of file LayersFwd.hpp.

◆ LayerEnumOf() [25/78]

constexpr LayerType armnn::LayerEnumOf ( const FakeQuantizationLayer )
constexpr

Definition at line 138 of file LayersFwd.hpp.

◆ LayerEnumOf() [26/78]

constexpr LayerType armnn::LayerEnumOf ( const FillLayer )
constexpr

Definition at line 139 of file LayersFwd.hpp.

◆ LayerEnumOf() [27/78]

constexpr LayerType armnn::LayerEnumOf ( const FloorLayer )
constexpr

Definition at line 140 of file LayersFwd.hpp.

◆ LayerEnumOf() [28/78]

constexpr LayerType armnn::LayerEnumOf ( const FullyConnectedLayer )
constexpr

Definition at line 141 of file LayersFwd.hpp.

◆ LayerEnumOf() [29/78]

constexpr LayerType armnn::LayerEnumOf ( const FusedLayer )
constexpr

Definition at line 142 of file LayersFwd.hpp.

◆ LayerEnumOf() [30/78]

constexpr LayerType armnn::LayerEnumOf ( const GatherLayer )
constexpr

Definition at line 143 of file LayersFwd.hpp.

◆ LayerEnumOf() [31/78]

constexpr LayerType armnn::LayerEnumOf ( const GatherNdLayer )
constexpr

Definition at line 144 of file LayersFwd.hpp.

◆ LayerEnumOf() [32/78]

constexpr LayerType armnn::LayerEnumOf ( const InputLayer )
constexpr

Definition at line 145 of file LayersFwd.hpp.

◆ LayerEnumOf() [33/78]

constexpr LayerType armnn::LayerEnumOf ( const InstanceNormalizationLayer )
constexpr

Definition at line 146 of file LayersFwd.hpp.

◆ LayerEnumOf() [34/78]

constexpr LayerType armnn::LayerEnumOf ( const L2NormalizationLayer )
constexpr

Definition at line 147 of file LayersFwd.hpp.

◆ LayerEnumOf() [35/78]

constexpr LayerType armnn::LayerEnumOf ( const LogicalBinaryLayer )
constexpr

Definition at line 148 of file LayersFwd.hpp.

◆ LayerEnumOf() [36/78]

constexpr LayerType armnn::LayerEnumOf ( const LogSoftmaxLayer )
constexpr

Definition at line 149 of file LayersFwd.hpp.

◆ LayerEnumOf() [37/78]

constexpr LayerType armnn::LayerEnumOf ( const LstmLayer )
constexpr

Definition at line 150 of file LayersFwd.hpp.

◆ LayerEnumOf() [38/78]

constexpr LayerType armnn::LayerEnumOf ( const MapLayer )
constexpr

Definition at line 151 of file LayersFwd.hpp.

◆ LayerEnumOf() [39/78]

constexpr LayerType armnn::LayerEnumOf ( const MaximumLayer )
constexpr

Definition at line 152 of file LayersFwd.hpp.

◆ LayerEnumOf() [40/78]

constexpr LayerType armnn::LayerEnumOf ( const MeanLayer )
constexpr

Definition at line 153 of file LayersFwd.hpp.

◆ LayerEnumOf() [41/78]

constexpr LayerType armnn::LayerEnumOf ( const MemCopyLayer )
constexpr

Definition at line 154 of file LayersFwd.hpp.

◆ LayerEnumOf() [42/78]

constexpr LayerType armnn::LayerEnumOf ( const MemImportLayer )
constexpr

Definition at line 155 of file LayersFwd.hpp.

◆ LayerEnumOf() [43/78]

constexpr LayerType armnn::LayerEnumOf ( const MergeLayer )
constexpr

Definition at line 156 of file LayersFwd.hpp.

◆ LayerEnumOf() [44/78]

constexpr LayerType armnn::LayerEnumOf ( const MinimumLayer )
constexpr

Definition at line 157 of file LayersFwd.hpp.

◆ LayerEnumOf() [45/78]

constexpr LayerType armnn::LayerEnumOf ( const MultiplicationLayer )
constexpr

Definition at line 158 of file LayersFwd.hpp.

◆ LayerEnumOf() [46/78]

constexpr LayerType armnn::LayerEnumOf ( const NormalizationLayer )
constexpr

Definition at line 159 of file LayersFwd.hpp.

◆ LayerEnumOf() [47/78]

constexpr LayerType armnn::LayerEnumOf ( const OutputLayer )
constexpr

Definition at line 160 of file LayersFwd.hpp.

◆ LayerEnumOf() [48/78]

constexpr LayerType armnn::LayerEnumOf ( const PadLayer )
constexpr

Definition at line 161 of file LayersFwd.hpp.

◆ LayerEnumOf() [49/78]

constexpr LayerType armnn::LayerEnumOf ( const PermuteLayer )
constexpr

Definition at line 162 of file LayersFwd.hpp.

◆ LayerEnumOf() [50/78]

constexpr LayerType armnn::LayerEnumOf ( const Pooling2dLayer )
constexpr

Definition at line 163 of file LayersFwd.hpp.

◆ LayerEnumOf() [51/78]

constexpr LayerType armnn::LayerEnumOf ( const Pooling3dLayer )
constexpr

Definition at line 164 of file LayersFwd.hpp.

◆ LayerEnumOf() [52/78]

constexpr LayerType armnn::LayerEnumOf ( const PreCompiledLayer )
constexpr

Definition at line 165 of file LayersFwd.hpp.

◆ LayerEnumOf() [53/78]

constexpr LayerType armnn::LayerEnumOf ( const PreluLayer )
constexpr

Definition at line 166 of file LayersFwd.hpp.

◆ LayerEnumOf() [54/78]

constexpr LayerType armnn::LayerEnumOf ( const QLstmLayer )
constexpr

Definition at line 168 of file LayersFwd.hpp.

◆ LayerEnumOf() [55/78]

constexpr LayerType armnn::LayerEnumOf ( const QuantizedLstmLayer )
constexpr

Definition at line 169 of file LayersFwd.hpp.

◆ LayerEnumOf() [56/78]

constexpr LayerType armnn::LayerEnumOf ( const QuantizeLayer )
constexpr

Definition at line 167 of file LayersFwd.hpp.

◆ LayerEnumOf() [57/78]

constexpr LayerType armnn::LayerEnumOf ( const RankLayer )
constexpr

Definition at line 170 of file LayersFwd.hpp.

◆ LayerEnumOf() [58/78]

constexpr LayerType armnn::LayerEnumOf ( const ReduceLayer )
constexpr

Definition at line 171 of file LayersFwd.hpp.

◆ LayerEnumOf() [59/78]

constexpr LayerType armnn::LayerEnumOf ( const ReshapeLayer )
constexpr

Definition at line 172 of file LayersFwd.hpp.

◆ LayerEnumOf() [60/78]

constexpr LayerType armnn::LayerEnumOf ( const ResizeLayer )
constexpr

Definition at line 173 of file LayersFwd.hpp.

◆ LayerEnumOf() [61/78]

constexpr LayerType armnn::LayerEnumOf ( const ReverseV2Layer )
constexpr

Definition at line 174 of file LayersFwd.hpp.

◆ LayerEnumOf() [62/78]

constexpr LayerType armnn::LayerEnumOf ( const ShapeLayer )
constexpr

Definition at line 175 of file LayersFwd.hpp.

◆ LayerEnumOf() [63/78]

constexpr LayerType armnn::LayerEnumOf ( const SliceLayer )
constexpr

Definition at line 176 of file LayersFwd.hpp.

◆ LayerEnumOf() [64/78]

constexpr LayerType armnn::LayerEnumOf ( const SoftmaxLayer )
constexpr

Definition at line 177 of file LayersFwd.hpp.

◆ LayerEnumOf() [65/78]

constexpr LayerType armnn::LayerEnumOf ( const SpaceToBatchNdLayer )
constexpr

Definition at line 178 of file LayersFwd.hpp.

◆ LayerEnumOf() [66/78]

constexpr LayerType armnn::LayerEnumOf ( const SpaceToDepthLayer )
constexpr

Definition at line 179 of file LayersFwd.hpp.

◆ LayerEnumOf() [67/78]

constexpr LayerType armnn::LayerEnumOf ( const SplitterLayer )
constexpr

Definition at line 180 of file LayersFwd.hpp.

◆ LayerEnumOf() [68/78]

constexpr LayerType armnn::LayerEnumOf ( const StackLayer )
constexpr

Definition at line 181 of file LayersFwd.hpp.

◆ LayerEnumOf() [69/78]

constexpr LayerType armnn::LayerEnumOf ( const StandInLayer )
constexpr

Definition at line 182 of file LayersFwd.hpp.

◆ LayerEnumOf() [70/78]

constexpr LayerType armnn::LayerEnumOf ( const StridedSliceLayer )
constexpr

Definition at line 183 of file LayersFwd.hpp.

◆ LayerEnumOf() [71/78]

constexpr LayerType armnn::LayerEnumOf ( const SubtractionLayer )
constexpr

Definition at line 184 of file LayersFwd.hpp.

◆ LayerEnumOf() [72/78]

constexpr LayerType armnn::LayerEnumOf ( const SwitchLayer )
constexpr

Definition at line 185 of file LayersFwd.hpp.

◆ LayerEnumOf() [73/78]

constexpr LayerType armnn::LayerEnumOf ( const T *  = nullptr)
constexpr

◆ LayerEnumOf() [74/78]

constexpr LayerType armnn::LayerEnumOf ( const TileLayer )
constexpr

Definition at line 186 of file LayersFwd.hpp.

◆ LayerEnumOf() [75/78]

constexpr LayerType armnn::LayerEnumOf ( const TransposeConvolution2dLayer )
constexpr

Definition at line 188 of file LayersFwd.hpp.

◆ LayerEnumOf() [76/78]

constexpr LayerType armnn::LayerEnumOf ( const TransposeLayer )
constexpr

Definition at line 187 of file LayersFwd.hpp.

◆ LayerEnumOf() [77/78]

constexpr LayerType armnn::LayerEnumOf ( const UnidirectionalSequenceLstmLayer )
constexpr

Definition at line 189 of file LayersFwd.hpp.

◆ LayerEnumOf() [78/78]

constexpr LayerType armnn::LayerEnumOf ( const UnmapLayer )
constexpr

Definition at line 190 of file LayersFwd.hpp.

◆ LevelToString()

std::string armnn::LevelToString ( LogSeverity  level)
inline

Definition at line 22 of file Logging.hpp.

23 {
24  switch(level)
25  {
26  case LogSeverity::Trace:
27  return "Trace";
28  case LogSeverity::Debug:
29  return "Debug";
30  case LogSeverity::Info:
31  return "Info";
32  case LogSeverity::Warning:
33  return "Warning";
34  case LogSeverity::Error:
35  return "Error";
36  case LogSeverity::Fatal:
37  return "Fatal";
38  default:
39  return "Log";
40  }
41 }

References Debug, Error, Fatal, Info, Trace, and Warning.

Referenced by ScopedRecord::ScopedRecord().

◆ LogSoftmax()

void LogSoftmax ( Decoder< float > &  input,
Encoder< float > &  output,
const TensorInfo inputInfo,
const LogSoftmaxDescriptor descriptor 
)

Definition at line 29 of file LogSoftmax.cpp.

33 {
34  const unsigned int numDimensions = inputInfo.GetNumDimensions();
35 
36  bool axisIsValid = ValidateAxis(descriptor.m_Axis, numDimensions);
37  ARMNN_ASSERT_MSG(axisIsValid,
38  "Axis index is not in range [-numDimensions, numDimensions).");
39  IgnoreUnused(axisIsValid);
40 
41  unsigned int uAxis = descriptor.m_Axis < 0 ?
42  numDimensions - armnn::numeric_cast<unsigned int>(std::abs(descriptor.m_Axis)) :
43  armnn::numeric_cast<unsigned int>(descriptor.m_Axis);
44 
45  const TensorShape& inputShape = inputInfo.GetShape();
46  const unsigned int outerSize = armnnUtils::GetNumElementsBetween(inputShape, 0, uAxis);
47  const unsigned int axisSize = inputShape[uAxis];
48  const unsigned int innerSize = armnnUtils::GetNumElementsBetween(inputShape,
49  uAxis + 1,
50  inputShape.GetNumDimensions());
51 
52  for (unsigned int outer = 0; outer < outerSize; ++outer)
53  {
54  for (unsigned int inner = 0; inner < innerSize; ++inner)
55  {
56  // Find max
57  input[outer * axisSize * innerSize + inner];
58  float maxValue = input.Get();
59  for (unsigned int i = 1u; i < axisSize; ++i)
60  {
61  input[(outer * axisSize + i) * innerSize + inner];
62  maxValue = std::max(maxValue, input.Get());
63  }
64 
65  // Compute sum
66  float sum = 0.0f;
67  for (unsigned int i = 0u; i < axisSize; ++i)
68  {
69  input[(outer * axisSize + i) * innerSize + inner];
70  sum += std::exp((input.Get() - maxValue) * descriptor.m_Beta);
71  }
72 
73  // Compute log sum
74  const float logSum = std::log(sum);
75 
76  // Compute result
77  for (unsigned int i = 0u; i < axisSize; ++i)
78  {
79  const unsigned int index = (outer * axisSize + i) * innerSize + inner;
80 
81  input [index];
82  output[index];
83 
84  output.Set((input.Get() - maxValue) * descriptor.m_Beta - logSum);
85  }
86  }
87  }
88 }

References ARMNN_ASSERT_MSG, Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), IgnoreUnused(), SoftmaxDescriptor::m_Axis, SoftmaxDescriptor::m_Beta, and Encoder< IType >::Set().

◆ LstmImpl()

void LstmImpl ( const LstmDescriptor descriptor,
const TensorInfo inputInfo,
const TensorInfo outputInfo,
const TensorShape inputToOutputWeightsShape,
const TensorShape recurrentToOutputWeightsShape,
std::unique_ptr< Decoder< float >> &  inputData,
std::unique_ptr< Decoder< float >> &  outputStateIn,
std::unique_ptr< Decoder< float >> &  cellStateIn,
std::unique_ptr< Encoder< float >> &  outputStateOut,
std::unique_ptr< Encoder< float >> &  cellStateOut,
std::unique_ptr< Encoder< float >> &  output,
std::unique_ptr< Decoder< float >> &  cellStateOutDecoder,
std::unique_ptr< Decoder< float >> &  outputDecoder,
std::unique_ptr< Decoder< float >> &  inputToInputWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputToForgetWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputToCellWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputToOutputWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToInputWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToForgetWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToCellWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToOutputWeightsTensor,
std::unique_ptr< Decoder< float >> &  cellToInputWeightsTensor,
std::unique_ptr< Decoder< float >> &  cellToForgetWeightsTensor,
std::unique_ptr< Decoder< float >> &  cellToOutputWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputGateBiasTensor,
std::unique_ptr< Decoder< float >> &  forgetGateBiasTensor,
std::unique_ptr< Decoder< float >> &  cellBiasTensor,
std::unique_ptr< Decoder< float >> &  outputGateBiasTensor,
std::unique_ptr< Decoder< float >> &  projectionWeightsTensor,
std::unique_ptr< Decoder< float >> &  projectionBiasTensor,
std::unique_ptr< Decoder< float >> &  inputLayerNormWeights,
std::unique_ptr< Decoder< float >> &  forgetLayerNormWeights,
std::unique_ptr< Decoder< float >> &  cellLayerNormWeights,
std::unique_ptr< Decoder< float >> &  outputLayerNormWeights,
std::unique_ptr< Encoder< float >> &  inputGateScratch,
std::unique_ptr< Encoder< float >> &  cellScratch,
std::unique_ptr< Encoder< float >> &  forgetGateScratch,
std::unique_ptr< Encoder< float >> &  outputGateScratch,
std::unique_ptr< Decoder< float >> &  inputGateScratchDecoder,
std::unique_ptr< Decoder< float >> &  cellScratchDecoder,
std::unique_ptr< Decoder< float >> &  forgetGateScratchDecoder,
std::unique_ptr< Decoder< float >> &  outputGateScratchDecoder,
float  layerNormEpsilon 
)

Definition at line 13 of file Lstm.cpp.

56 {
57  // This is a porting of the LSTM::Eval() method in the Android code base
58  // Refer to: android/frameworks/ml/nn/common/operations/LSTM.cpp
59 
60  const TensorShape& inputShape = inputInfo.GetShape();
61  const DataType& outputType = outputInfo.GetDataType();
62 
63  const uint32_t nBatch = inputShape[0];
64  const uint32_t nInput = inputShape[1];
65 
66  const uint32_t nCell = inputToOutputWeightsShape[0];
67  const uint32_t nOutput = recurrentToOutputWeightsShape[1];
68 
69  const bool useCifg = descriptor.m_CifgEnabled;
70  const bool usePeephole = descriptor.m_PeepholeEnabled;
71  const bool useLayerNorm = descriptor.m_LayerNormEnabled;
72 
73  if (!useLayerNorm)
74  {
75  // Initialize scratch buffers with bias.
76  if (!useCifg)
77  {
78  VectorBatchVectorAssign(*inputGateBiasTensor,
79  nCell, nBatch, *inputGateScratch);
80  }
81  VectorBatchVectorAssign(*forgetGateBiasTensor,
82  nCell, nBatch, *forgetGateScratch);
83  VectorBatchVectorAssign(*cellBiasTensor,
84  nCell, nBatch, *cellScratch);
85  VectorBatchVectorAssign(*outputGateBiasTensor,
86  nCell, nBatch, *outputGateScratch);
87  }
88  else
89  {
90  // Initialize scratch buffers with zeroes.
91  if (!useCifg)
92  {
93  ZeroVector(*inputGateScratch, nCell * nBatch);
94  }
95  ZeroVector(*forgetGateScratch, nCell * nBatch);
96  ZeroVector(*cellScratch , nCell * nBatch);
97  ZeroVector(*outputGateScratch, nCell * nBatch);
98  }
99 
100  // For each batch and cell: compute input_weight * input.
101  if (!useCifg)
102  {
103  MatrixBatchVectorMultiplyAccumulate(*inputToInputWeightsTensor,
104  nCell, nInput, *inputData, nBatch, *inputGateScratch);
105  }
106  MatrixBatchVectorMultiplyAccumulate(*inputToForgetWeightsTensor,
107  nCell, nInput, *inputData, nBatch, *forgetGateScratch);
108  MatrixBatchVectorMultiplyAccumulate(*inputToCellWeightsTensor,
109  nCell, nInput, *inputData, nBatch, *cellScratch);
110  MatrixBatchVectorMultiplyAccumulate(*inputToOutputWeightsTensor,
111  nCell, nInput, *inputData, nBatch, *outputGateScratch);
112 
113  // For each batch and cell: compute recurrent_weight * output_state.
114  if (!useCifg)
115  {
116  MatrixBatchVectorMultiplyAccumulate(*recurrentToInputWeightsTensor,
117  nCell, nOutput, *outputStateIn, nBatch, *inputGateScratch);
118  }
119  MatrixBatchVectorMultiplyAccumulate(*recurrentToForgetWeightsTensor,
120  nCell, nOutput, *outputStateIn, nBatch, *forgetGateScratch);
121  MatrixBatchVectorMultiplyAccumulate(*recurrentToCellWeightsTensor,
122  nCell, nOutput, *outputStateIn, nBatch, *cellScratch);
123  MatrixBatchVectorMultiplyAccumulate(*recurrentToOutputWeightsTensor,
124  nCell, nOutput, *outputStateIn, nBatch, *outputGateScratch);
125 
126  // For each batch and cell: update input gate.
127  if (!useCifg)
128  {
129  if (usePeephole)
130  {
131  VectorBatchVectorCwiseProductAccumulate(*cellToInputWeightsTensor,
132  nCell, *cellStateIn, nBatch, *inputGateScratch);
133  }
134  if (useLayerNorm)
135  {
136  MeanStddevNormalization(*inputGateScratchDecoder,
137  *inputGateScratch, nCell, nBatch, layerNormEpsilon);
138  VectorBatchVectorCwiseProduct(*inputLayerNormWeights,
139  nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
140  VectorBatchVectorAdd(*inputGateBiasTensor,
141  nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
142  }
143  Activation(*inputGateScratchDecoder, *inputGateScratch,
144  TensorInfo({nCell, nBatch}, outputType),
145  ActivationFunction::Sigmoid, 0, 0);
146  }
147 
148  // For each batch and cell: update forget gate.
149  if (usePeephole)
150  {
151  VectorBatchVectorCwiseProductAccumulate(*cellToForgetWeightsTensor, nCell,
152  *cellStateIn, nBatch, *forgetGateScratch);
153  }
154  if (useLayerNorm)
155  {
156  MeanStddevNormalization(*forgetGateScratchDecoder,
157  *forgetGateScratch, nCell, nBatch, layerNormEpsilon);
158  VectorBatchVectorCwiseProduct(*forgetLayerNormWeights,
159  nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
160  VectorBatchVectorAdd(*forgetGateBiasTensor,
161  nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
162  }
163  Activation(*forgetGateScratchDecoder, *forgetGateScratch,
164  TensorInfo({nCell, nBatch}, outputType),
165  ActivationFunction::Sigmoid, 0, 0);
166 
167  // For each batch and cell: update the cell.
168  if (useLayerNorm)
169  {
170  MeanStddevNormalization(*cellScratchDecoder,
171  *cellScratch, nCell, nBatch, layerNormEpsilon);
172  VectorBatchVectorCwiseProduct(*cellLayerNormWeights,
173  nCell, *cellScratchDecoder, nBatch, *cellScratch);
174  VectorBatchVectorAdd(*cellBiasTensor,
175  nCell, *cellScratchDecoder, nBatch, *cellScratch);
176  }
177 
178  VectorVectorCwiseProduct(*forgetGateScratchDecoder, *cellStateIn, nBatch * nCell, *cellStateOut);
179 
180  ActivationFunction armnnActivationFunc = ActivationFunction::Sigmoid;
181  float a = 0;
182  float b = 0;
183  SetActivationParameters(descriptor.m_ActivationFunc, armnnActivationFunc, a, b);
184 
185  if (descriptor.m_ActivationFunc > 0)
186  {
187  Activation(*cellScratchDecoder, *cellScratch,
188  TensorInfo({nCell, nBatch}, outputType),
189  armnnActivationFunc, a, b);
190  }
191  if (useCifg)
192  {
193  Sub1Vector(*forgetGateScratchDecoder, nBatch * nCell, *forgetGateScratch);
195  *cellScratchDecoder, *forgetGateScratchDecoder, nBatch * nCell, *cellStateOut);
196  }
197  else
198  {
200  *cellScratchDecoder, *inputGateScratchDecoder, nBatch * nCell, *cellStateOut);
201  }
202  if (descriptor.m_ClippingThresCell > 0.0)
203  {
204  ClipVector(*cellStateOutDecoder, nBatch * nCell, descriptor.m_ClippingThresCell, *cellStateOut);
205  }
206 
207  // For each batch and cell: update the output gate.
208  if (usePeephole)
209  {
210  VectorBatchVectorCwiseProductAccumulate(*cellToOutputWeightsTensor,
211  nCell, *cellStateOutDecoder, nBatch, *outputGateScratch);
212  }
213  if (useLayerNorm)
214  {
215  MeanStddevNormalization(*outputGateScratchDecoder,
216  *outputGateScratch, nCell, nBatch, layerNormEpsilon);
217  VectorBatchVectorCwiseProduct(*outputLayerNormWeights,
218  nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
219  VectorBatchVectorAdd(*outputGateBiasTensor,
220  nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
221  }
222  Activation(*outputGateScratchDecoder, *outputGateScratch,
223  TensorInfo({nCell, nBatch}, outputType),
224  ActivationFunction::Sigmoid, 0, 0);
225 
226  if (descriptor.m_ActivationFunc > 0)
227  {
228  Activation(*cellStateOutDecoder, *cellScratch,
229  TensorInfo({nCell, nBatch}, outputType),
230  armnnActivationFunc, a, b);
231  }
232 
233  VectorVectorCwiseProduct(*outputGateScratchDecoder, *cellScratchDecoder, nBatch * nCell, *outputGateScratch);
234 
235  // For each batch: update the projection and output_state.
236  if (descriptor.m_ProjectionEnabled)
237  {
238  if (projectionBiasTensor)
239  {
240  VectorBatchVectorAssign(*projectionBiasTensor,
241  nOutput, nBatch, *output);
242  }
243  MatrixBatchVectorMultiplyAccumulate(*projectionWeightsTensor,
244  nOutput, nCell, *outputGateScratchDecoder, nBatch, *output);
245 
246  if (descriptor.m_ClippingThresProj > 0.0)
247  {
248  ClipVector(*outputDecoder, nBatch * nOutput, descriptor.m_ClippingThresProj, *output);
249  }
250  }
251  else
252  {
253  CopyVector(*outputGateScratchDecoder, nBatch * nOutput, *output);
254  }
255 
256  CopyVector(*outputDecoder, nBatch * nOutput, *outputStateOut);
257 }

References Activation(), ClipVector(), CopyVector(), TensorInfo::GetDataType(), TensorInfo::GetShape(), LstmDescriptor::m_ActivationFunc, LstmDescriptor::m_CifgEnabled, LstmDescriptor::m_ClippingThresCell, LstmDescriptor::m_ClippingThresProj, LstmDescriptor::m_LayerNormEnabled, LstmDescriptor::m_PeepholeEnabled, LstmDescriptor::m_ProjectionEnabled, MatrixBatchVectorMultiplyAccumulate(), MeanStddevNormalization(), SetActivationParameters(), Sigmoid, Sub1Vector(), VectorBatchVectorAdd(), VectorBatchVectorAssign(), VectorBatchVectorCwiseProduct(), VectorBatchVectorCwiseProductAccumulate(), VectorVectorCwiseProduct(), VectorVectorCwiseProductAccumulate(), and ZeroVector().

◆ MakeDecoder() [1/2]

std::unique_ptr< Decoder< int32_t > > MakeDecoder ( const TensorInfo info,
const void *  data 
)
inline

Definition at line 64 of file Decoders.hpp.

65 {
66  switch(info.GetDataType())
67  {
68  case DataType::QAsymmS8:
69  {
70  return std::make_unique<QASymmS8Decoder>(
71  static_cast<const int8_t*>(data),
72  info.GetQuantizationScale(),
73  info.GetQuantizationOffset());
74  }
75  case DataType::QAsymmU8:
76  {
77  return std::make_unique<QASymm8Decoder>(
78  static_cast<const uint8_t*>(data),
79  info.GetQuantizationScale(),
80  info.GetQuantizationOffset());
81  }
82  case DataType::QSymmS16:
83  {
84  return std::make_unique<QSymm16Decoder>(
85  static_cast<const int16_t*>(data),
86  info.GetQuantizationScale(),
87  info.GetQuantizationOffset());
88  }
89  case DataType::Float16:
90  {
91  return std::make_unique<Float16Decoder>(static_cast<const Half*>(data));
92  }
93  case DataType::Float32:
94  {
95  return std::make_unique<Float32Decoder>(static_cast<const float*>(data));
96  }
97  case DataType::Signed32:
98  {
99  return MakeSigned32Decoder(info, data);
100  }
101  case DataType::QSymmS8:
102  {
103  if (info.HasPerAxisQuantization())
104  {
105  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
106  return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
107  }
108  else
109  {
110  return std::make_unique<QSymmS8Decoder>(
111  static_cast<const int8_t*>(data),
112  info.GetQuantizationScale(),
113  info.GetQuantizationOffset());
114  }
115  }
117  {
118  return std::make_unique<BooleanDecoder>(static_cast<const uint8_t*>(data));
119  }
120  default:
121  {
122  throw InvalidArgumentException("Unsupported target Data Type!");
123  break;
124  }
125  }
126  return nullptr;
127 }

References Boolean, Float16, Float32, armnnUtils::GetPerAxisParams(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeDecoder() [2/2]

std::unique_ptr<Decoder<T> > armnn::MakeDecoder ( const TensorInfo info,
const void *  data = nullptr 
)
inline

Definition at line 64 of file Decoders.hpp.

65 {
66  switch(info.GetDataType())
67  {
68  case DataType::QAsymmS8:
69  {
70  return std::make_unique<QASymmS8Decoder>(
71  static_cast<const int8_t*>(data),
72  info.GetQuantizationScale(),
73  info.GetQuantizationOffset());
74  }
75  case DataType::QAsymmU8:
76  {
77  return std::make_unique<QASymm8Decoder>(
78  static_cast<const uint8_t*>(data),
79  info.GetQuantizationScale(),
80  info.GetQuantizationOffset());
81  }
82  case DataType::QSymmS16:
83  {
84  return std::make_unique<QSymm16Decoder>(
85  static_cast<const int16_t*>(data),
86  info.GetQuantizationScale(),
87  info.GetQuantizationOffset());
88  }
89  case DataType::Float16:
90  {
91  return std::make_unique<Float16Decoder>(static_cast<const Half*>(data));
92  }
93  case DataType::Float32:
94  {
95  return std::make_unique<Float32Decoder>(static_cast<const float*>(data));
96  }
97  case DataType::Signed32:
98  {
99  return MakeSigned32Decoder(info, data);
100  }
101  case DataType::QSymmS8:
102  {
103  if (info.HasPerAxisQuantization())
104  {
105  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
106  return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
107  }
108  else
109  {
110  return std::make_unique<QSymmS8Decoder>(
111  static_cast<const int8_t*>(data),
112  info.GetQuantizationScale(),
113  info.GetQuantizationOffset());
114  }
115  }
117  {
118  return std::make_unique<BooleanDecoder>(static_cast<const uint8_t*>(data));
119  }
120  default:
121  {
122  throw InvalidArgumentException("Unsupported target Data Type!");
123  break;
124  }
125  }
126  return nullptr;
127 }

References Boolean, Float16, Float32, armnnUtils::GetPerAxisParams(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeEncoder() [1/2]

std::unique_ptr< Encoder< int32_t > > MakeEncoder ( const TensorInfo info,
void *  data 
)
inline

Definition at line 19 of file Encoders.hpp.

20 {
21  switch(info.GetDataType())
22  {
24  {
25  return std::make_unique<QASymmS8Encoder>(
26  static_cast<int8_t*>(data),
27  info.GetQuantizationScale(),
28  info.GetQuantizationOffset());
29  }
31  {
32  return std::make_unique<QASymm8Encoder>(
33  static_cast<uint8_t*>(data),
34  info.GetQuantizationScale(),
35  info.GetQuantizationOffset());
36  }
37  case DataType::QSymmS8:
38  {
39  if (info.HasPerAxisQuantization())
40  {
41  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
42  return std::make_unique<QSymm8PerAxisEncoder>(
43  static_cast<int8_t*>(data),
44  params.second,
45  params.first);
46  }
47  else
48  {
49  return std::make_unique<QSymmS8Encoder>(
50  static_cast<int8_t*>(data),
51  info.GetQuantizationScale(),
52  info.GetQuantizationOffset());
53  }
54  }
56  {
57  if (info.HasPerAxisQuantization())
58  {
59  unsigned int axis = info.GetQuantizationDim().value();
60  auto axisDimensionality = info.GetShape()[axis];
61  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
62  return std::make_unique<QSymm16PerAxisEncoder>(
63  static_cast<int16_t*>(data),
64  params.second,
65  params.first,
66  axisDimensionality);
67  }
68  else
69  {
70  return std::make_unique<QSymm16Encoder>(
71  static_cast<int16_t *>(data),
72  info.GetQuantizationScale(),
73  info.GetQuantizationOffset());
74  }
75  }
77  {
78  return std::make_unique<Int32Encoder>(static_cast<int32_t*>(data));
79  }
81  {
82  return std::make_unique<Float16Encoder>(static_cast<Half*>(data));
83  }
85  {
86  return std::make_unique<Float32Encoder>(static_cast<float*>(data));
87  }
88  default:
89  {
90  throw InvalidArgumentException("Unsupported target Data Type!");
91  break;
92  }
93  }
94  return nullptr;
95 }

References Float16, Float32, armnnUtils::GetPerAxisParams(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeEncoder() [2/2]

std::unique_ptr<Encoder<T> > armnn::MakeEncoder ( const TensorInfo info,
void *  data = nullptr 
)
inline

Definition at line 19 of file Encoders.hpp.

20 {
21  switch(info.GetDataType())
22  {
24  {
25  return std::make_unique<QASymmS8Encoder>(
26  static_cast<int8_t*>(data),
27  info.GetQuantizationScale(),
28  info.GetQuantizationOffset());
29  }
31  {
32  return std::make_unique<QASymm8Encoder>(
33  static_cast<uint8_t*>(data),
34  info.GetQuantizationScale(),
35  info.GetQuantizationOffset());
36  }
37  case DataType::QSymmS8:
38  {
39  if (info.HasPerAxisQuantization())
40  {
41  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
42  return std::make_unique<QSymm8PerAxisEncoder>(
43  static_cast<int8_t*>(data),
44  params.second,
45  params.first);
46  }
47  else
48  {
49  return std::make_unique<QSymmS8Encoder>(
50  static_cast<int8_t*>(data),
51  info.GetQuantizationScale(),
52  info.GetQuantizationOffset());
53  }
54  }
56  {
57  if (info.HasPerAxisQuantization())
58  {
59  unsigned int axis = info.GetQuantizationDim().value();
60  auto axisDimensionality = info.GetShape()[axis];
61  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
62  return std::make_unique<QSymm16PerAxisEncoder>(
63  static_cast<int16_t*>(data),
64  params.second,
65  params.first,
66  axisDimensionality);
67  }
68  else
69  {
70  return std::make_unique<QSymm16Encoder>(
71  static_cast<int16_t *>(data),
72  info.GetQuantizationScale(),
73  info.GetQuantizationOffset());
74  }
75  }
77  {
78  return std::make_unique<Int32Encoder>(static_cast<int32_t*>(data));
79  }
81  {
82  return std::make_unique<Float16Encoder>(static_cast<Half*>(data));
83  }
85  {
86  return std::make_unique<Float32Encoder>(static_cast<float*>(data));
87  }
88  default:
89  {
90  throw InvalidArgumentException("Unsupported target Data Type!");
91  break;
92  }
93  }
94  return nullptr;
95 }

References Float16, Float32, armnnUtils::GetPerAxisParams(), info, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

◆ MakeInfo()

arm_compute::DetectionPostProcessLayerInfo armnn::MakeInfo ( const DetectionPostProcessDescriptor descriptor)

Definition at line 17 of file NeonDetectionPostProcessWorkload.cpp.

18 {
19  return arm_compute::DetectionPostProcessLayerInfo(descriptor.m_MaxDetections,
20  descriptor.m_MaxClassesPerDetection,
21  descriptor.m_NmsScoreThreshold,
22  descriptor.m_NmsIouThreshold,
23  descriptor.m_NumClasses,
24  { descriptor.m_ScaleX,
25  descriptor.m_ScaleY,
26  descriptor.m_ScaleW,
27  descriptor.m_ScaleH },
28  descriptor.m_UseRegularNms,
29  descriptor.m_DetectionsPerClass);
30 }

References DetectionPostProcessDescriptor::m_DetectionsPerClass, DetectionPostProcessDescriptor::m_MaxClassesPerDetection, DetectionPostProcessDescriptor::m_MaxDetections, DetectionPostProcessDescriptor::m_NmsIouThreshold, DetectionPostProcessDescriptor::m_NmsScoreThreshold, DetectionPostProcessDescriptor::m_NumClasses, and DetectionPostProcessDescriptor::m_UseRegularNms.

Referenced by NeonDetectionPostProcessValidate().

◆ MakeOptimizations()

Optimizer::Optimizations armnn::MakeOptimizations ( Args &&...  args)

Definition at line 43 of file Optimizer.hpp.

44 {
45  Optimizer::Optimizations optimizations;
46 
47  Append(optimizations, std::forward<Args>(args)...);
48 
49  return optimizations;
50 }

References Append().

Referenced by ApplyBackendOptimizations(), and Optimize().

◆ MakeOptional()

Optional<T> armnn::MakeOptional ( Args &&...  args)

Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> object.

Definition at line 305 of file Optional.hpp.

306 {
307  return Optional<T>(CONSTRUCT_IN_PLACE, std::forward<Args>(args)...);
308 }

References CONSTRUCT_IN_PLACE.

◆ MakeTransformIterator()

constexpr TransformIterator<Function, Iterator> armnn::MakeTransformIterator ( Iterator  i,
Function  f 
)
constexpr

Definition at line 90 of file TransformIterator.hpp.

91 {
92  return TransformIterator<Function, Iterator>(i, f);
93 }

◆ MirrorPad()

void MirrorPad ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const ITensorHandle inputHandle,
ITensorHandle outputHandle,
const PadQueueDescriptor data 
)

Definition at line 59 of file MirrorPad.cpp.

64 {
65  auto padList = data.m_Parameters.m_PadList;
66  PaddingMode paddingMode = data.m_Parameters.m_PaddingMode;
67 
68  TensorShape outputShape = outputInfo.GetShape();
69  TensorShape inputShape = inputInfo.GetShape();
70 
71  unsigned int numOutputElements = outputInfo.GetNumElements();
72  unsigned int numInputDimensions = inputShape.GetNumDimensions();
73  assert(numInputDimensions == outputShape.GetNumDimensions());
74 
75  // If padding mode is Reflect then both paddings must be no greater than inputShape(i) - 1.
76  // If padding mode is Symmetric then both paddings must be no greater than inputShape(i).
77  const unsigned int isReflect = static_cast<unsigned int>(paddingMode == PaddingMode::Reflect);
78  for(unsigned int i = 0; i < padList.size(); ++i)
79  {
80  if(padList.at(i).first > (inputShape[i] - isReflect) ||
81  padList.at(i).second > (inputShape[i] - isReflect))
82  {
83  throw armnn::InvalidArgumentException("Paddings must be less (Reflect) or "
84  "equal (Symmetric) to the dimension size.");
85  }
86  }
87 
88  auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
89  auto outData = MakeEncoder<float>(outputInfo, outputHandle->Map());
90 
91  Decoder<float>& input = *inputData;
92  Encoder<float>& output = *outData;
93 
94  for(unsigned int idx = 0; idx < numOutputElements; ++idx)
95  {
96  // Get the coordinates of the current index in vector form. E.g inx 1 = [0, 0, 0, 1 ]
97  const std::vector<unsigned int> coord = IndexToCoord(outputShape, idx);
98 
99  std::vector<unsigned int> dimensions;
100  std::vector<unsigned int> coords;
101 
102  for(unsigned int i = 0; i < numInputDimensions; ++i)
103  {
104  dimensions.emplace_back(i);
105  coords.emplace_back(coord[i]);
106  }
107 
108  auto isInPadding = [&](unsigned int i)
109  {
110  return (coords[i] < padList[i].first || coords[i] > inputShape[i] + padList[i].first - 1);
111  };
112 
113  auto getReflectIndex = [&](unsigned int i) -> unsigned int
114  {
115  if(isInPadding(i))
116  {
117  if(coords[i] < padList[i].first)
118  {
119  return padList[i].first - coords[i];
120  }
121  else
122  {
123  return 2 * inputShape[i] + padList[i].first - 2 - coords[i];
124  }
125  }
126  return coords[i] - padList[i].first;
127  };
128 
129  auto getSymmetricIndex = [&](unsigned int i) -> unsigned int
130  {
131  if(isInPadding(i))
132  {
133  if(coords[i] < padList[i].first)
134  {
135  return padList[i].first - coords[i] - 1;
136  }
137  else
138  {
139  return 2 * inputShape[i] + padList[i].first - 1 - coords[i];
140  }
141  }
142  return coords[i] - padList[i].first;
143  };
144 
145  // Location of the value in the input tensor to use in the output.
146  std::vector<unsigned int> coordOfInput;
147 
148  // any_of works as a loop here to check if any of the dimensions are in the padding.
149  // If dimensions is in the padding area, then create the coordinates of the location in the
150  // input tensor to use in the output.
151  // E.g.
152  // Input tensor = [ 1, 2, 3 ], Rank = 1.
153  // Output tensor = [ 2, 1, 2, 3, 1 ] if Reflect or [ 1, 1, 2, 3, 3 ] if Symmetric with a padding of (1, 1).
154  // So it will either return [ 1 ] or [ 0 ] which is used to set the first value in the output tensor and so on.
155  if(std::any_of(dimensions.begin(), dimensions.end(), isInPadding))
156  {
157  switch(paddingMode)
158  {
159  case PaddingMode::Reflect:
160  {
161  for(unsigned int i = 0; i < numInputDimensions; ++i)
162  {
163  coordOfInput.emplace_back(getReflectIndex(i));
164  }
165  break;
166  }
167  case PaddingMode::Symmetric:
168  {
169  for(unsigned int i = 0; i < numInputDimensions; ++i)
170  {
171  coordOfInput.emplace_back(getSymmetricIndex(i));
172  }
173  break;
174  }
175  default:
176  throw InvalidArgumentException("Padding mode not supported.");
177  break;
178  }
179  }
180  else
181  {
182  for(unsigned int i = 0; i < numInputDimensions; ++i)
183  {
184  coordOfInput.emplace_back(coord[i] - padList[i].first);
185  }
186  }
187 
188  // Set output value using the coordinate of the input value to use.
189  const unsigned int indexOfInput = CoordToIndex(inputShape, coordOfInput);
190 
191  input[indexOfInput];
192  auto inputValue = input.Get();
193 
194  output[idx];
195  output.Set(inputValue);
196  }
197 }

References Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), PadDescriptor::m_PaddingMode, PadDescriptor::m_PadList, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, ITensorHandle::Map(), Reflect, Encoder< IType >::Set(), and Symmetric.

◆ MockTensorHandleFactoryId()

constexpr const char* armnn::MockTensorHandleFactoryId ( )
constexpr

Definition at line 14 of file MockTensorHandleFactory.hpp.

15 {
16  return "Arm/Mock/TensorHandleFactory";
17 }

Referenced by MockTensorHandleFactory::GetIdStatic().

◆ NeonAbsWorkloadValidate()

arm_compute::Status NeonAbsWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonAbsWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEAbsLayer::validate(&aclInput, &aclOutput);
23 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonActivationWorkloadValidate()

arm_compute::Status NeonActivationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ActivationDescriptor descriptor 
)

Definition at line 17 of file NeonActivationWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  const arm_compute::ActivationLayerInfo activationLayerInfo =
26 
27  return arm_compute::NEActivationLayer::validate(&aclInput,
28  &aclOutput,
29  activationLayerInfo);
30 }

Referenced by NeonLayerSupport::IsActivationSupported().

◆ NeonAdditionWorkloadValidate()

arm_compute::Status NeonAdditionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 20 of file NeonAdditionWorkload.cpp.

24 {
25  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
26  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
27  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
28 
29  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
30  activationDescriptor);
31 
32  return arm_compute::NEArithmeticAddition::validate(&aclInput0,
33  &aclInput1,
34  &aclOutput,
35  arm_compute::ConvertPolicy::SATURATE,
36  activationInfo);
37 }

Referenced by NeonLayerSupport::IsAdditionSupported(), IsLayerTypeSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonArgMinMaxWorkloadValidate()

arm_compute::Status NeonArgMinMaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ArgMinMaxDescriptor descriptor 
)

Definition at line 31 of file NeonArgMinMaxWorkload.cpp.

34 {
35  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
36  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
37 
38  auto numDims = input.GetNumDimensions();
39  auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis);
40  int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
41 
42  if (descriptor.m_Function == ArgMinMaxFunction::Max)
43  {
44  return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
45  arm_compute::ReductionOperation::ARG_IDX_MAX);
46  }
47  else
48  {
49  return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
50  arm_compute::ReductionOperation::ARG_IDX_MIN);
51  }
52 }

Referenced by NeonLayerSupport::IsArgMinMaxSupported().

◆ NeonBackendId()

constexpr const char* armnn::NeonBackendId ( )
constexpr

Definition at line 10 of file NeonBackendId.hpp.

10 { return "CpuAcc"; }

Referenced by NeonBackend::GetIdStatic().

◆ NeonBatchMatMulValidate()

arm_compute::Status NeonBatchMatMulValidate ( const TensorInfo inputInfoX,
const TensorInfo inputInfoY,
const TensorInfo outputInfo,
const BatchMatMulDescriptor descriptor,
const bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file NeonBatchMatMulWorkload.cpp.

25 {
26  if (descriptor.m_AdjointX || descriptor.m_AdjointY )
27  {
28  throw Exception("Support for adjoint not implemented.");
29  }
30  if (descriptor.m_DataLayoutX != armnn::DataLayout::NCHW || descriptor.m_DataLayoutY != armnn::DataLayout::NCHW )
31  {
32  throw Exception("Only supported the MatMul in the last 2 dimensions");
33  }
34 
35  arm_compute::TensorInfo aclInputInfoX = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoX);
36  arm_compute::TensorInfo aclInputInfoY = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoY);
37  arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(outputInfo);
38 
39  // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
40  aclInputInfoX.set_are_values_constant(false);
41  aclInputInfoY.set_are_values_constant(false);
42 
43  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
44  activationDescriptor);
45 
46  arm_compute::MatMulInfo matMulInfo;
47  matMulInfo.adj_lhs(descriptor.m_TransposeX);
48  matMulInfo.adj_rhs(descriptor.m_TransposeY);
49 
50  arm_compute::CpuMatMulSettings settings;
51  settings.fast_math(isFastMathEnabled);
52 
53  return arm_compute::NEMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, settings,
54  activationInfo);
55 }

References BatchMatMulDescriptor::m_AdjointX, BatchMatMulDescriptor::m_AdjointY, BatchMatMulDescriptor::m_DataLayoutX, BatchMatMulDescriptor::m_DataLayoutY, and NCHW.

Referenced by NeonLayerSupport::IsBatchMatMulSupported().

◆ NeonBatchNormalizationValidate()

arm_compute::Status NeonBatchNormalizationValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo mean,
const TensorInfo var,
const TensorInfo beta,
const TensorInfo gamma,
const BatchNormalizationDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonBatchNormalizationWorkload.cpp.

32 {
33  const arm_compute::TensorInfo aclInputInfo =
34  armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
35  const arm_compute::TensorInfo aclOutputInfo =
36  armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
37  const arm_compute::TensorInfo aclMeanInfo =
38  armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
39  const arm_compute::TensorInfo aclVarInfo =
40  armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
41  const arm_compute::TensorInfo aclBetaInfo =
42  armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
43  const arm_compute::TensorInfo aclGammaInfo =
44  armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
45 
46  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
47  activationDescriptor);
48 
49  return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo,
50  &aclOutputInfo,
51  &aclMeanInfo,
52  &aclVarInfo,
53  &aclBetaInfo,
54  &aclGammaInfo,
55  descriptor.m_Eps,
56  activationInfo);
57 }

Referenced by NeonLayerSupport::IsBatchNormalizationSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonBatchToSpaceNdWorkloadValidate()

arm_compute::Status NeonBatchToSpaceNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const BatchToSpaceNdDescriptor descriptor 
)

Definition at line 15 of file NeonBatchToSpaceNdWorkload.cpp.

18 {
19  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
20  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
21 
22  arm_compute::Status statusBatchToSpace = arm_compute::Status(arm_compute::ErrorCode::OK);
23  arm_compute::Status statusReshapeInput = arm_compute::Status(arm_compute::ErrorCode::OK);
24  arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
25 
26  arm_compute::TensorInfo aclReshapeInputInfo = aclInputInfo;
27  arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
28 
29  // When a spacial dimension is missing (rank=3) set W to 1
30  const unsigned int rank = input.GetNumDimensions();
31  if (rank == 3)
32  {
33  const arm_compute::TensorShape inputShape = aclInputInfo.tensor_shape();
34  const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
35 
36  if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
37  {
38  // In ACL dimensions are right to left: C, W, H, N
39  aclReshapeInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
40  aclReshapeOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
41  }
42  else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
43  {
44  // In ACL dimensions are right to left: W, H, C, N
45  aclReshapeInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
46  aclReshapeOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
47  }
48  else
49  {
50  throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
51  }
52 
53  statusReshapeInput = arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
54  statusReshapeOutput = arm_compute::NEReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
55  }
56 
57  // ArmNN blockShape is [H, W] ACl asks for W, H
58  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
59  int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
60 
61  const arm_compute::CropInfo cropInfo = BuildArmComputeCropInfo(descriptor, rank);
62 
63  statusBatchToSpace = arm_compute::NEBatchToSpaceLayer::validate(rank == 3 ? &aclReshapeInputInfo : &aclInputInfo,
64  blockWidth,
65  blockHeight,
66  rank == 3 ? &aclReshapeOutputInfo : &aclOutputInfo,
67  cropInfo);
68 
69  if (statusReshapeInput.error_code() == arm_compute::ErrorCode::OK &&
70  statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
71  statusBatchToSpace.error_code() == arm_compute::ErrorCode::OK)
72  {
73  return arm_compute::Status(arm_compute::ErrorCode::OK,
74  "All BatchToSpace layers validate status OK.");
75  }
76  else
77  {
78  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
79  "BatchToSpace layer validate status failed."
80  + statusBatchToSpace.error_description()
81  + statusReshapeInput.error_description()
82  + statusReshapeOutput.error_description());
83  }
84 }

Referenced by NeonLayerSupport::IsBatchToSpaceNdSupported().

◆ NeonCastValidate()

arm_compute::Status NeonCastValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file NeonCastWorkload.cpp.

20 {
21  arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::NECast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
25 }

Referenced by NeonLayerSupport::IsCastSupported().

◆ NeonChannelShuffleValidate()

arm_compute::Status NeonChannelShuffleValidate ( const TensorInfo input,
const TensorInfo output,
const ChannelShuffleDescriptor descriptor 
)

Definition at line 17 of file NeonChannelShuffleWorkload.cpp.

20 {
21  arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  // In Arm NN and in NNAPI, channel shuffle implementation is datalayout agnostic and it has axis as a parameter.
25  // The channel shuffle Implementation for Neon is dependent on datalayout and does not have axis as a parameter,
26  // it only supports channel shuffle for 4D tensors in dimension C (1 or 3).
27  arm_compute::DataLayout aclDataLayout;
28  if (input.GetNumDimensions() == 4)
29  {
30  switch (descriptor.m_Axis)
31  {
32  case 1:
33  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
34  break;
35  case 3:
36  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NHWC);
37  break;
38  default:
39  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported axis"};
40  }
41  aclInputInfo.set_data_layout(aclDataLayout);
42  aclOutputInfo.set_data_layout(aclDataLayout);
43  return arm_compute::NEChannelShuffleLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_NumGroups);
44  }
45  else
46  {
47  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported number of dimensions"};
48  }
49 }

Referenced by NeonLayerSupport::IsChannelShuffleSupported().

◆ NeonComparisonWorkloadValidate()

arm_compute::Status NeonComparisonWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ComparisonDescriptor descriptor 
)

Definition at line 16 of file NeonComparisonWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput0 = BuildArmComputeTensorInfo(input0);
22  const arm_compute::TensorInfo aclInput1 = BuildArmComputeTensorInfo(input1);
23  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
24 
25  const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(descriptor);
26 
27  const arm_compute::Status aclStatus = arm_compute::NEElementwiseComparison::validate(&aclInput0,
28  &aclInput1,
29  &aclOutput,
30  comparisonOperation);
31  return aclStatus;
32 }

Referenced by NeonLayerSupport::IsComparisonSupported().

◆ NeonConcatWorkloadValidate()

arm_compute::Status NeonConcatWorkloadValidate ( const std::vector< const TensorInfo * > &  inputs,
const TensorInfo output,
const OriginsDescriptor descriptor 
)

Definition at line 27 of file NeonConcatWorkload.cpp.

31 {
32  std::vector<arm_compute::TensorInfo> aclInputs;
33  for (const TensorInfo* input : inputs)
34  {
35  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
36  aclInputs.emplace_back(aclInputInfo);
37  }
38  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
39  std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
40  for (arm_compute::ITensorInfo& input : aclInputs)
41  {
42  aclInputPtrs.emplace_back(&input);
43  }
44 
45  size_t aclAxis = CalcAxis(descriptor);
46  return arm_compute::NEConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
47 }

Referenced by NeonLayerSupport::IsConcatSupported().

◆ NeonConstantWorkloadValidate()

arm_compute::Status NeonConstantWorkloadValidate ( const TensorInfo output)

Definition at line 20 of file NeonConstantWorkload.cpp.

21 {
22  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  std::array<arm_compute::DataType,9> supportedTypes = {
25  arm_compute::DataType::BFLOAT16,
26  arm_compute::DataType::F16,
27  arm_compute::DataType::F32,
28  arm_compute::DataType::QASYMM8,
29  arm_compute::DataType::QASYMM8_SIGNED,
30  arm_compute::DataType::QSYMM16,
31  arm_compute::DataType::QSYMM8,
32  arm_compute::DataType::QSYMM8_PER_CHANNEL,
33  arm_compute::DataType::S32
34  };
35  auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
36 
37  if (it != end(supportedTypes))
38  {
39  return arm_compute::Status{};
40  }
41  else
42  {
43  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
44  }
45 }

Referenced by NeonLayerSupport::IsConstantSupported().

◆ NeonConvertFp16ToFp32WorkloadValidate()

arm_compute::Status NeonConvertFp16ToFp32WorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file NeonConvertFp16ToFp32Workload.cpp.

20 {
21  // Fallback to portable software implementation if Compute Library NECast won't work, so
22  // this method always returns success
23 
24  armnn::IgnoreUnused(input);
25  armnn::IgnoreUnused(output);
26  return arm_compute::Status();
27 }

References IgnoreUnused().

Referenced by NeonLayerSupport::IsConvertFp16ToFp32Supported().

◆ NeonConvertFp32ToFp16WorkloadValidate()

arm_compute::Status NeonConvertFp32ToFp16WorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 21 of file NeonConvertFp32ToFp16Workload.cpp.

22 {
23  // Fallback to portable software implementation if Compute Library NECast won't work, so
24  // this method always returns success
25 
26  armnn::IgnoreUnused(input);
27  armnn::IgnoreUnused(output);
28  return arm_compute::Status();
29 }

References IgnoreUnused().

Referenced by NeonLayerSupport::IsConvertFp32ToFp16Supported().

◆ NeonConvolution2dWorkloadValidate()

arm_compute::Status NeonConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonConvolution2dWorkload.cpp.

31 {
32  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
33  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
34  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
35  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
36 
37  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
38  descriptor.m_DilationY);
39 
40  arm_compute::TensorInfo aclBiasesInfo;
41  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
42 
43  if (descriptor.m_BiasEnabled)
44  {
45  if (!biases.has_value())
46  {
47  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
48  "ArmNN NeonConvolution2dWorkload has empty bias value."};
49  }
50  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
51  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
52  optionalAclBiasesInfo = &aclBiasesInfo;
53  }
54 
55  arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
56 
57  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
58  activationDescriptor);
59 
60  return arm_compute::NEConvolutionLayer::validate(&aclInputInfo,
61  &aclWeightsInfo,
62  optionalAclBiasesInfo,
63  &aclOutputInfo,
64  layerInfo,
65  arm_compute::WeightsInfo(),
66  aclDilationInfo,
67  activationInfo,
68  isFastMathEnabled);
69 }

Referenced by NeonLayerSupport::IsConvolution2dSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonConvolution3dWorkloadValidate()

arm_compute::Status NeonConvolution3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution3dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonConvolution3dWorkload.cpp.

31 {
32  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
33  const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
34  arm_compute::TensorInfo aclBiasesInfo;
35  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
36  if (descriptor.m_BiasEnabled)
37  {
38  if (!biases.has_value())
39  {
40  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
41  "ArmNN NeonConvolution3dWorkload has empty bias value."};
42  }
43 
44  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
45  optionalAclBiasesInfo = &aclBiasesInfo;
46  }
47  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
48 
49  const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
50  isFastMathEnabled,
51  activationDescriptor);
52 
53  return arm_compute::NEConv3D::validate(&aclInputInfo,
54  &aclWeightsInfo,
55  optionalAclBiasesInfo,
56  &aclOutputInfo,
57  aclConv3DInfo);
58 }

Referenced by NeonLayerSupport::IsConvolution3dSupported().

◆ NeonDepthToSpaceWorkloadValidate()

arm_compute::Status NeonDepthToSpaceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthToSpaceDescriptor descriptor 
)

Definition at line 19 of file NeonDepthToSpaceWorkload.cpp.

22 {
23  DataLayout dataLayout = descriptor.m_DataLayout;
24  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
25  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
26 
27  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
28 
29  return arm_compute::NEDepthToSpaceLayer::validate(&aclInput, &aclOutput, blockSize);
30 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by NeonLayerSupport::IsDepthToSpaceSupported().

◆ NeonDepthwiseConvolutionWorkloadValidate()

arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const ActivationDescriptor activationDescriptor 
)

Definition at line 29 of file NeonDepthwiseConvolutionWorkload.cpp.

35 {
36  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
37  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
38 
39  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
40  //
41  // ACL format for weights for depthwise is:
42  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
43  // - [1, C, H, W] for [N, C, H, W] input/output layout
44  //
45  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
46  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
47  // so we do the permute here for the TensorInfo weights.
48  unsigned int aclDepthMultiplier;
49  TensorInfo weightsPermuted;
50  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
51 
52  // Convert the weights into the compute library format
53  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
54  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
55 
56  arm_compute::TensorInfo aclBiasesInfo;
57  arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
58  if (descriptor.m_BiasEnabled)
59  {
60  if(!biases.has_value())
61  {
62  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
63  "ArmNN NeonDepthwiseConvolutionWorkload has empty bias value."};
64  }
65  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
66  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
67  optionalAclBiasesInfo = &aclBiasesInfo;
68  }
69 
70  const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
71  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
72  descriptor.m_DilationY);
73 
74  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
75  activationDescriptor);
76 
77  return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
78  &aclWeightsInfo,
79  optionalAclBiasesInfo,
80  &aclOutputInfo,
81  aclPadStrideInfo,
82  aclDepthMultiplier,
83  activationInfo,
84  aclDilationInfo);
85 }

Referenced by NeonLayerSupport::IsDepthwiseConvolutionSupported(), NeonLayerSupport::IsDilatedDepthwiseConvolutionSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonDequantizeWorkloadValidate()

arm_compute::Status NeonDequantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 22 of file NeonDequantizeWorkload.cpp.

24 {
25  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
27 
28  return arm_compute::NEDequantizationLayer::validate(&aclInput, &aclOutput);
29 }

Referenced by NeonLayerSupport::IsDequantizeSupported().

◆ NeonDetected()

bool NeonDetected ( )

Definition at line 37 of file Utils.cpp.

38 {
39 #if !defined(ARMNN_BUILD_BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
40  auto hwcaps= getauxval(AT_HWCAP);
41 #endif
42 
43 #if !defined(ARMNN_BUILD_BARE_METAL) && defined(__aarch64__)
44 
45  if (hwcaps & HWCAP_ASIMD)
46  {
47  // On an arm64 device with Neon.
48  return true;
49  }
50  else
51  {
52  // On an arm64 device without Neon.
53  return false;
54  }
55 
56 #endif
57 #if !defined(ARMNN_BUILD_BARE_METAL) && defined(__arm__)
58 
59  if (hwcaps & HWCAP_NEON)
60  {
61  // On an armhf device with Neon.
62  return true;
63  }
64  else
65  {
66  // On an armhf device without Neon.
67  return false;
68  }
69 
70 #endif
71 
72  // This method of Neon detection is only supported on Linux so in order to prevent a false negative
73  // we will return true in cases where detection did not run.
74  return true;
75 }

◆ NeonDetectionPostProcessValidate()

arm_compute::Status NeonDetectionPostProcessValidate ( const TensorInfo boxEncodings,
const TensorInfo scores,
const TensorInfo anchors,
const TensorInfo detectionBoxes,
const TensorInfo detectionClasses,
const TensorInfo detectionScores,
const TensorInfo numDetections,
const DetectionPostProcessDescriptor descriptor 
)

Definition at line 32 of file NeonDetectionPostProcessWorkload.cpp.

40 {
41  arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(descriptor);
42 
43  const arm_compute::TensorInfo aclBoxEncodings =
44  armcomputetensorutils::BuildArmComputeTensorInfo(boxEncodings);
45 
46  const arm_compute::TensorInfo aclScores =
47  armcomputetensorutils::BuildArmComputeTensorInfo(scores);
48 
49  const arm_compute::TensorInfo aclAnchors =
50  armcomputetensorutils::BuildArmComputeTensorInfo(anchors);
51 
52  arm_compute::TensorInfo aclDetectionBoxes =
53  armcomputetensorutils::BuildArmComputeTensorInfo(detectionBoxes);
54 
55  arm_compute::TensorInfo aclDetectionClasses =
56  armcomputetensorutils::BuildArmComputeTensorInfo(detectionClasses);
57 
58  arm_compute::TensorInfo aclDetectionScores =
59  armcomputetensorutils::BuildArmComputeTensorInfo(detectionScores);
60 
61  arm_compute::TensorInfo aclNumDetections =
62  armcomputetensorutils::BuildArmComputeTensorInfo(numDetections);
63 
64  return arm_compute::NEDetectionPostProcessLayer::validate(
65  &aclBoxEncodings,
66  &aclScores,
67  &aclAnchors,
68  &aclDetectionBoxes,
69  &aclDetectionClasses,
70  &aclDetectionScores,
71  &aclNumDetections,
72  info);
73 }

References info, and MakeInfo().

◆ NeonDivisionWorkloadValidate()

arm_compute::Status NeonDivisionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 18 of file NeonDivisionWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
24  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
28  activationDescriptor);
29 
30  return arm_compute::NEElementwiseDivision::validate(&aclInput0,
31  &aclInput1,
32  &aclOutput,
33  activationInfo);
34 }

Referenced by NeonLayerSupport::IsDivisionSupported(), IsLayerTypeSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonElementwiseBinaryWorkloadValidate()

arm_compute::Status NeonElementwiseBinaryWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ElementwiseBinaryDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 20 of file NeonElementwiseBinaryWorkload.cpp.

25 {
26  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
27  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
28  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
29 
30  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
31  activationDescriptor);
32 
33  switch (descriptor.m_Operation)
34  {
36  return arm_compute::NEElementwisePower::validate(&aclInput0,
37  &aclInput1,
38  &aclOutput,
39  activationInfo);
41  return arm_compute::NEElementwiseSquaredDiff::validate(&aclInput0,
42  &aclInput1,
43  &aclOutput,
44  activationInfo);
45  default:
46  throw InvalidArgumentException("Unknown binary operator", CHECK_LOCATION());
47  }
48 }

Referenced by IsLayerTypeSupported().

◆ NeonExpWorkloadValidate()

arm_compute::Status NeonExpWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonExpWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEExpLayer::validate(&aclInput, &aclOutput);
23 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonFullyConnectedWorkloadValidate()

arm_compute::Status NeonFullyConnectedWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const FullyConnectedDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonFullyConnectedWorkload.cpp.

30 {
31  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
32  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
33  arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
34  aclWeights.set_are_values_constant(weights.IsConstant());
35 
36  arm_compute::TensorInfo aclBiases;
37  arm_compute::TensorInfo* optionalAclBiases = nullptr;
38  if (descriptor.m_BiasEnabled)
39  {
40  ARMNN_ASSERT(biases.has_value());
41  aclBiases = BuildArmComputeTensorInfo(biases.value());
42  aclBiases.set_are_values_constant(biases.value().IsConstant());
43  optionalAclBiases = &aclBiases;
44  }
45 
46  const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
47  ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
48  return arm_compute::NEFullyConnectedLayer::validate(&aclInput,
49  &aclWeights,
50  optionalAclBiases,
51  &aclOutput,
52  fullyConnectedLayerInfo);
53 }

Referenced by NeonLayerSupport::IsFullyConnectedSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonFusedWorkloadValidate()

arm_compute::Status NeonFusedWorkloadValidate ( const std::vector< std::reference_wrapper< TensorInfo >> &  inputInfos,
const std::vector< std::reference_wrapper< TensorInfo >> &  outputInfos,
const FusedDescriptor fusedDescriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 22 of file NeonFusedWorkload.cpp.

26 {
27  std::vector<arm_compute::TensorInfo> actInputInfos;
28  actInputInfos.reserve(inputInfos.size());
29  for (size_t i = 0u; i < inputInfos.size(); ++i)
30  {
31  actInputInfos.emplace_back(BuildArmComputeTensorInfo(inputInfos[i]));
32  }
33 
34  std::vector<arm_compute::TensorInfo> actOutputInfos;
35  actOutputInfos.reserve(outputInfos.size());
36  for (size_t i = 0u; i < outputInfos.size(); ++i)
37  {
38  actOutputInfos.emplace_back(BuildArmComputeTensorInfo(outputInfos[i]));
39  }
40 
41  const arm_compute::ActivationLayerInfo activationInfo =
43 
44  switch (fusedDescriptor.m_FusedKernelType)
45  {
46  case FusedKernelType::AddMulAdd:
47  return arm_compute::NEAddMulAdd::validate(
48  &actInputInfos[0],
49  &actInputInfos[1],
50  &actInputInfos[2], // bn_mul
51  &actInputInfos[3], // bn_add
52  actOutputInfos.size() == 1 ? nullptr : &actOutputInfos[0], // add_output
53  actOutputInfos.size() == 1 ? &actOutputInfos[0] : &actOutputInfos[1], // final_output
54  arm_compute::ConvertPolicy::SATURATE,
55  activationInfo);
56  default:
57  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
58  "NeonFusedWorkloadValidate: no valid kernel type"};
59  }
60 }

Referenced by NeonLayerSupport::IsFusedSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonGatherNdWorkloadValidate()

arm_compute::Status NeonGatherNdWorkloadValidate ( const TensorInfo paramsInfo,
const TensorInfo indicesInfo,
const TensorInfo outputInfo 
)

Validate Mul

Validate ReduceSum

Validate Gather

Validate Reshape

Return OK if all the layers are valid

Definition at line 14 of file NeonGatherNdWorkload.cpp.

17 {
18  // Calculate ND, K, W, C.
19  std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
20 
21  /// Validate Mul
22  // Indices with shape { W, ND }
23  armnn::TensorInfo indices_W_ND_Info = indicesInfo;
24  indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
25  const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
26 
27  // Flattened coefficients with shape { ND }
28  armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
29  flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
30  const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
31 
32  // Output of Mul with shape { W, ND }
33  const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
34 
35  auto statusMul = arm_compute::NEPixelWiseMultiplication::validate(&aclIndicesInfo,
36  &aclFlattenedCoeffInfo,
37  &aclOutputMulInfo,
38  1.0f,
39  arm_compute::ConvertPolicy::WRAP,
40  arm_compute::RoundingPolicy::TO_ZERO,
41  arm_compute::ActivationLayerInfo());
42 
43  /// Validate ReduceSum
44  // Flattened indices with shape { W }
45  armnn::TensorInfo flattenedIndices_Info = indicesInfo;
46  flattenedIndices_Info.SetShape({ keyIndices["W"] });
47  const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
48 
49  const std::vector<unsigned int> armnnReduceAxes(1, 1);
50  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
51  indices_W_ND_Info.GetNumDimensions(),
52  armnnReduceAxes);
53 
54  auto statusReduceSum = arm_compute::NEReductionOperation::validate(&aclOutputMulInfo,
55  &aclFlattenedIndicesInfo,
56  static_cast<unsigned int>(coords[0]),
57  arm_compute::ReductionOperation::SUM,
58  false);
59 
60  /// Validate Gather
61  // Params with shape { K, C }
62  armnn::TensorInfo params_K_C_Info = paramsInfo;
63  params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
64  const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
65 
66  // Output of gather with shape { W, C }
67  armnn::TensorInfo outputGather_Info = outputInfo;
68  outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
69  const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
70 
71  auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
72  auto statusGather =
73  arm_compute::NEGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
74 
75  /// Validate Reshape
76  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
77 
78  auto statusReshape = arm_compute::NEReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
79 
80  /// Return OK if all the layers are valid
81  auto okCode = arm_compute::ErrorCode::OK;
82  if (statusMul.error_code() == okCode &&
83  statusReduceSum.error_code() == okCode &&
84  statusGather.error_code() == okCode &&
85  statusReshape.error_code() == okCode)
86  {
87  return arm_compute::Status(arm_compute::ErrorCode::OK,
88  "All GatherND layers validate status OK.");
89  }
90  else
91  {
92  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
93  "GatherND layer validate status failed.");
94  }
95 }

References CalculateGatherNdKeyIndices(), and TensorInfo::SetShape().

Referenced by NeonLayerSupport::IsGatherNdSupported().

◆ NeonGatherWorkloadValidate()

arm_compute::Status NeonGatherWorkloadValidate ( const TensorInfo input,
const TensorInfo indices,
const TensorInfo output,
const GatherDescriptor descriptor 
)

Definition at line 13 of file NeonGatherWorkload.cpp.

17 {
18  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
19  const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
20  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
21 
22  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
23 
24  return arm_compute::NEGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
25 }

Referenced by NeonLayerSupport::IsGatherSupported().

◆ NeonInstanceNormalizationWorkloadValidate()

arm_compute::Status NeonInstanceNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const InstanceNormalizationDescriptor descriptor 
)

Definition at line 19 of file NeonInstanceNormalizationWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
25 
26  return arm_compute::NEInstanceNormalizationLayer::validate(&aclInputInfo,
27  &aclOutputInfo,
28  descriptor.m_Gamma,
29  descriptor.m_Beta,
30  descriptor.m_Eps);
31 }

Referenced by NeonLayerSupport::IsInstanceNormalizationSupported().

◆ NeonL2NormalizationWorkloadValidate()

arm_compute::Status NeonL2NormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const L2NormalizationDescriptor descriptor 
)

Definition at line 19 of file NeonL2NormalizationFloatWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
24  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
25 
26  int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
27 
28  return arm_compute::NEL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps);
29 }

Referenced by NeonLayerSupport::IsL2NormalizationSupported().

◆ NeonLogicalAndWorkloadValidate()

arm_compute::Status NeonLogicalAndWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 18 of file NeonLogicalAndWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
23  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
25 
26  const arm_compute::Status aclStatus = arm_compute::NELogicalAnd::validate(&aclInputInfo0,
27  &aclInputInfo1,
28  &aclOutputInfo);
29  return aclStatus;
30 }

Referenced by NeonLayerSupport::IsLogicalBinarySupported().

◆ NeonLogicalNotWorkloadValidate()

arm_compute::Status NeonLogicalNotWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file NeonLogicalNotWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
24 
25  const arm_compute::Status aclStatus = arm_compute::NELogicalNot::validate(&aclInputInfo,
26  &aclOutputInfo);
27  return aclStatus;
28 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonLogicalOrWorkloadValidate()

arm_compute::Status NeonLogicalOrWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 18 of file NeonLogicalOrWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
23  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
25 
26  const arm_compute::Status aclStatus = arm_compute::NELogicalOr::validate(&aclInputInfo0,
27  &aclInputInfo1,
28  &aclOutputInfo);
29  return aclStatus;
30 }

Referenced by NeonLayerSupport::IsLogicalBinarySupported().

◆ NeonLogSoftmaxWorkloadValidate()

arm_compute::Status NeonLogSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const LogSoftmaxDescriptor descriptor 
)

Definition at line 19 of file NeonLogSoftmaxWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
24  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
25 
26  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
27  return arm_compute::NELogSoftmaxLayer::validate(&aclInputInfo,
28  &aclOutputInfo,
29  descriptor.m_Beta,
30  aclAxis);
31 }

Referenced by NeonLayerSupport::IsLogSoftmaxSupported().

◆ NeonLogWorkloadValidate()

arm_compute::Status NeonLogWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonLogWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NELogLayer::validate(&aclInput, &aclOutput);
23 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonLstmFloatWorkloadValidate()

arm_compute::Status NeonLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo scratchBuffer,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const LstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 253 of file NeonLstmFloatWorkload.cpp.

262 {
263  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
264 
265  // The inputs and outputs
266  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
267  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
268  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
269  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
270  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
271  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
272  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
273 
274  // Basic parameters
275  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
276  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
277  const arm_compute::TensorInfo aclInputToCellWeightsInfo
278  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
279  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
280  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
281  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
282  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
283  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
284  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
285  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
286  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
287  const arm_compute::TensorInfo aclForgetGateBiasInfo
288  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
289  const arm_compute::TensorInfo aclCellBiasInfo
290  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
291  const arm_compute::TensorInfo aclOutputGateBiasInfo
292  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
293 
294  arm_compute::TensorInfo aclInputToInputWeightsInfo;
295  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
296  arm_compute::TensorInfo aclCellToInputWeightsInfo;
297  arm_compute::TensorInfo aclInputGateBiasInfo;
298  arm_compute::TensorInfo aclProjectionWeightsInfo;
299  arm_compute::TensorInfo aclProjectionBiasInfo;
300  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
301  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
302 
303  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
304  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
305  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
306  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
307 
308 
309  if (!descriptor.m_CifgEnabled)
310  {
311  if (descriptor.m_PeepholeEnabled)
312  {
313  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
314  }
315  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
316  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
317  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
318 
319  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
320  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
321  &aclInputGateBiasInfo);
322  }
323 
324  if (descriptor.m_ProjectionEnabled)
325  {
326  if (paramsInfo.m_ProjectionBias != nullptr)
327  {
328  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
329  }
330  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
331 
332  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
333  paramsInfo.m_ProjectionBias != nullptr ?
334  &aclProjectionBiasInfo : nullptr);
335  }
336 
337  if (descriptor.m_PeepholeEnabled)
338  {
339  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
340  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
341 
342  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
343  }
344 
345  if (descriptor.m_LayerNormEnabled)
346  {
347  if (!descriptor.m_CifgEnabled)
348  {
349  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
350  }
351  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
352  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
353  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
354 
355  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ?
356  nullptr : &aclInputLayerNormWeightsInfo,
357  &aclForgetLayerNormWeightsInfo,
358  &aclCellLayerNormWeightsInfo,
359  &aclOutputLayerNormWeightsInfo);
360  }
361 
362  float cell_threshold = descriptor.m_ClippingThresCell;
363  float projection_threshold = descriptor.m_ClippingThresProj;
364 
365  // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
366  arm_compute::ActivationLayerInfo activationLayerInfo =
367  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
368 
369  return arm_compute::NELSTMLayer::validate(&aclInputInfo,
370  &aclInputToForgetWeightsInfo,
371  &aclInputToCellWeightsInfo,
372  &aclInputToOutputWeightsInfo,
373  &aclRecurrentToForgetWeightsInfo,
374  &aclRecurrentToCellWeightsInfo,
375  &aclRecurrentToOutputWeightsInfo,
376  &aclForgetGateBiasInfo,
377  &aclCellBiasInfo,
378  &aclOutputGateBiasInfo,
379  &aclOutputStateInInfo,
380  &aclCellStateInInfo,
381  &aclScratchBufferInfo,
382  &aclOutputStateOutInfo,
383  &aclCellStateOutInfo,
384  &aclOutputInfo,
385  lstm_params_info,
386  activationLayerInfo,
387  cell_threshold,
388  projection_threshold);
389 }

Referenced by NeonLayerSupport::IsLstmSupported().

◆ NeonMaximumWorkloadValidate()

arm_compute::Status NeonMaximumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 14 of file NeonMaximumWorkload.cpp.

17 {
18  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
19  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEElementwiseMax::validate(&aclInput0,
23  &aclInput1,
24  &aclOutput);
25 }

Referenced by IsLayerTypeSupported(), and NeonLayerSupport::IsMaximumSupported().

◆ NeonMeanWorkloadValidate()

arm_compute::Status NeonMeanWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const MeanDescriptor descriptor 
)

Definition at line 18 of file NeonMeanWorkload.cpp.

21 {
22  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
25  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
26  input.GetNumDimensions(),
27  descriptor.m_Axis);
28 
29  return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
30 }

Referenced by NeonLayerSupport::IsMeanSupported().

◆ NeonMinimumWorkloadValidate()

arm_compute::Status NeonMinimumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Validate function for validating the inputs and output.

Parameters
[in]input0The input0 value to be validated.
[in]input1The input1 value to be validated.
[in]outputThe output value to be validated.

Definition at line 15 of file NeonMinimumWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
20  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NEElementwiseMin::validate(&aclInput0,
24  &aclInput1,
25  &aclOutput);
26 }

Referenced by IsLayerTypeSupported(), and NeonLayerSupport::IsMinimumSupported().

◆ NeonMultiplicationWorkloadValidate()

arm_compute::Status NeonMultiplicationWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file NeonMultiplicationWorkload.cpp.

23 {
24  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
25  const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
26  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
27 
28  auto convertPolicy = (IsQuantizedType(input0.GetDataType()) || IsQuantizedType(input1.GetDataType())) ?
29  arm_compute::ConvertPolicy::SATURATE :
30  arm_compute::ConvertPolicy::WRAP;
31 
32  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
33  activationDescriptor);
34 
35  // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
36  // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
37  // ignored for F32 tensors.
38  return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1,
39  &aclInput2,
40  &aclOutput,
41  1.0f,
42  convertPolicy,
43  arm_compute::RoundingPolicy::TO_ZERO,
44  activationInfo);
45 }

Referenced by IsLayerTypeSupported(), NeonLayerSupport::IsMultiplicationSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonNegWorkloadValidate()

arm_compute::Status NeonNegWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonNegWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NENegLayer::validate(&aclInput, &aclOutput);
23 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonNormalizationWorkloadValidate()

arm_compute::Status NeonNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const NormalizationDescriptor descriptor 
)

Definition at line 49 of file NeonNormalizationFloatWorkload.cpp.

52 {
53  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
54  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
55 
56  arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
57 
58  return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo);
59 }

Referenced by NeonLayerSupport::IsNormalizationSupported().

◆ NeonPadWorkloadValidate()

arm_compute::Status NeonPadWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const PadDescriptor descriptor 
)

Definition at line 59 of file NeonPadWorkload.cpp.

62 {
63  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
64  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
65 
66  std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size());
67 
68  std::reverse_copy(std::begin(descriptor.m_PadList),
69  std::end(descriptor.m_PadList),
70  std::begin(reversed_PadList));
71 
72  arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
73 
74  // PixelValue is currently unused when validating, but it's required to pass in PaddingMode.
75  arm_compute::PixelValue pixelValue = GetPixelValue(&aclInputInfo, descriptor.m_PadValue);
76  return arm_compute::NEPadLayer::validate(&aclInputInfo,
77  &aclOutputInfo,
78  padList,
79  pixelValue,
80  ConvertPaddingModeToAcl(descriptor.m_PaddingMode));
81 }

Referenced by NeonLayerSupport::IsPadSupported().

◆ NeonPermuteWorkloadValidate()

arm_compute::Status NeonPermuteWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const PermuteDescriptor descriptor 
)

Definition at line 15 of file NeonPermuteWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
22 
23  return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
24  armcomputetensorutils::BuildArmComputePermutationVector(mappings));
25 }

Referenced by NeonLayerSupport::IsPermuteSupported().

◆ NeonPooling2dWorkloadValidate()

arm_compute::Status NeonPooling2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling2dDescriptor descriptor 
)

Definition at line 22 of file NeonPooling2dWorkload.cpp.

25 {
26  const arm_compute::TensorInfo aclInputInfo =
27  BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
28  const arm_compute::TensorInfo aclOutputInfo =
29  BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
30 
31  arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
32 
33  return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
34 }

Referenced by NeonLayerSupport::IsPooling2dSupported().

◆ NeonPooling3dWorkloadValidate()

arm_compute::Status NeonPooling3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling3dDescriptor descriptor 
)

Definition at line 15 of file NeonPooling3dWorkload.cpp.

18  {
19  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
20  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
21  arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
22  return arm_compute::NEPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
23  }

Referenced by NeonLayerSupport::IsPooling3dSupported().

◆ NeonPreluWorkloadValidate()

arm_compute::Status NeonPreluWorkloadValidate ( const TensorInfo input,
const TensorInfo alpha,
const TensorInfo output 
)

Definition at line 17 of file NeonPreluWorkload.cpp.

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
23  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
25  return arm_compute::NEPReluLayer::validate(&aclInput,
26  &aclAlpha,
27  &aclOutput);
28 }

Referenced by NeonLayerSupport::IsPreluSupported().

◆ NeonQLstmWorkloadValidate()

arm_compute::Status NeonQLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo cellStateIn,
const TensorInfo outputStateIn,
const TensorInfo cellStateOut,
const TensorInfo outputStateOut,
const TensorInfo output,
const QLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 243 of file NeonQLstmWorkload.cpp.

251 {
252  arm_compute::LSTMParams<arm_compute::ITensorInfo> aclParamsInfo;
253 
254  // Input/Output tensor info
255  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
256  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
257  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
258 
259  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
260  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
261  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
262 
263  // Mandatory tensor info
264  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
265  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
266  const arm_compute::TensorInfo aclInputToCellWeightsInfo
267  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
268  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
269  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
270  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
271  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
272  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
273  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
274  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
275  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
276  const arm_compute::TensorInfo aclForgetGateBiasInfo
277  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
278  const arm_compute::TensorInfo aclCellBiasInfo
279  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
280  const arm_compute::TensorInfo aclOutputGateBiasInfo
281  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
282 
283  // Optional tensor info
284  arm_compute::TensorInfo aclInputToInputWeightsInfo;
285  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
286 
287  arm_compute::TensorInfo aclCellToInputWeightsInfo;
288  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
289  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
290 
291  arm_compute::TensorInfo aclInputGateBiasInfo;
292 
293  arm_compute::TensorInfo aclProjectionWeightsInfo;
294  arm_compute::TensorInfo aclProjectionBiasInfo;
295 
296  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
297  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
298  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
299  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
300 
301  // Create tensor info for optional params if they are enabled
302  if (descriptor.m_PeepholeEnabled)
303  {
304  if (!descriptor.m_CifgEnabled)
305  {
306  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
307  }
308 
309  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
310  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
311 
312  // Set peephole params info
313  aclParamsInfo.set_peephole_params(&aclCellToForgetWeightsInfo,
314  &aclCellToOutputWeightsInfo);
315  }
316 
317  if (descriptor.m_ProjectionEnabled)
318  {
319  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
320 
321  if (paramsInfo.m_ProjectionBias != nullptr)
322  {
323  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
324  }
325 
326  // Set projection params info
327  aclParamsInfo.set_projection_params(
328  &aclProjectionWeightsInfo,
329  paramsInfo.m_ProjectionBias != nullptr ? &aclProjectionBiasInfo : nullptr);
330  }
331 
332  if (descriptor.m_LayerNormEnabled)
333  {
334  if (!descriptor.m_CifgEnabled)
335  {
336  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
337  }
338 
339  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
340  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
341  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
342 
343  // Set layer norm params info
344  aclParamsInfo.set_layer_normalization_params(
345  paramsInfo.m_InputLayerNormWeights != nullptr ? &aclInputLayerNormWeightsInfo : nullptr,
346  &aclForgetLayerNormWeightsInfo,
347  &aclCellLayerNormWeightsInfo,
348  &aclOutputLayerNormWeightsInfo);
349  }
350 
351  if (!descriptor.m_CifgEnabled)
352  {
353  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
354  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
355  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
356 
357  // Set CIFG params info
358  aclParamsInfo.set_cifg_params(
359  &aclInputToInputWeightsInfo,
360  &aclRecurrentToInputWeightsInfo,
361  paramsInfo.m_CellToInputWeights != nullptr ? &aclCellToInputWeightsInfo : nullptr,
362  &aclInputGateBiasInfo);
363  }
364 
365  // Set scalar descriptor params
366  aclParamsInfo.set_cell_clip_params(descriptor.m_CellClip);
367  aclParamsInfo.set_projection_clip_params(descriptor.m_ProjectionClip);
368  aclParamsInfo.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
369  aclParamsInfo.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
370  descriptor.m_ForgetIntermediateScale,
371  descriptor.m_CellIntermediateScale,
372  descriptor.m_OutputIntermediateScale);
373 
374  // QLSTM NEON validate
375  return arm_compute::NEQLSTMLayer::validate(&aclInputInfo,
376  &aclInputToForgetWeightsInfo,
377  &aclInputToCellWeightsInfo,
378  &aclInputToOutputWeightsInfo,
379  &aclRecurrentToForgetWeightsInfo,
380  &aclRecurrentToCellWeightsInfo,
381  &aclRecurrentToOutputWeightsInfo,
382  &aclForgetGateBiasInfo,
383  &aclCellBiasInfo,
384  &aclOutputGateBiasInfo,
385  &aclCellStateInInfo,
386  &aclOutputStateInInfo,
387  &aclCellStateOutInfo,
388  &aclOutputStateOutInfo,
389  &aclOutputInfo,
390  aclParamsInfo);
391 }

Referenced by NeonLayerSupport::IsQLstmSupported().

◆ NeonQuantizedLstmWorkloadValidate()

arm_compute::Status NeonQuantizedLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo cellStateIn,
const TensorInfo outputStateIn,
const TensorInfo cellStateOut,
const TensorInfo outputStateOut,
const QuantizedLstmInputParamsInfo paramsInfo 
)

Definition at line 131 of file NeonQuantizedLstmWorkload.cpp.

137 {
138  // The inputs and outputs
139  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
140  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
141  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
142  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
143  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
144 
145  // Basic parameters
146  const arm_compute::TensorInfo aclInputToInputWeightsInfo
147  = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
148  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
149  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
150  const arm_compute::TensorInfo aclInputToCellWeightsInfo
151  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
152  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
153  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
154 
155  const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo
156  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
157  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
158  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
159  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
160  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
161  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
162  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
163 
164  const arm_compute::TensorInfo aclInputGateBiasInfo
165  = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
166  const arm_compute::TensorInfo aclForgetGateBiasInfo
167  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
168  const arm_compute::TensorInfo aclCellBiasInfo
169  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
170  const arm_compute::TensorInfo aclOutputGateBiasInfo
171  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
172 
173  return arm_compute::NELSTMLayerQuantized::validate(&aclInputInfo,
174  &aclInputToInputWeightsInfo,
175  &aclInputToForgetWeightsInfo,
176  &aclInputToCellWeightsInfo,
177  &aclInputToOutputWeightsInfo,
178  &aclRecurrentToInputWeightsInfo,
179  &aclRecurrentToForgetWeightsInfo,
180  &aclRecurrentToCellWeightsInfo,
181  &aclRecurrentToOutputWeightsInfo,
182  &aclInputGateBiasInfo,
183  &aclForgetGateBiasInfo,
184  &aclCellBiasInfo,
185  &aclOutputGateBiasInfo,
186  &aclCellStateInInfo,
187  &aclOutputStateInInfo,
188  &aclCellStateOutInfo,
189  &aclOutputStateOutInfo);
190 }

Referenced by NeonLayerSupport::IsQuantizedLstmSupported().

◆ NeonQuantizeWorkloadValidate()

arm_compute::Status NeonQuantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file NeonQuantizeWorkload.cpp.

19 {
20  const arm_compute::TensorInfo neonInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NEQuantizationLayer::validate(&neonInputInfo, &neonOutputInfo);
24 }

Referenced by NeonLayerSupport::IsQuantizeSupported().

◆ NeonReduceWorkloadValidate()

arm_compute::Status NeonReduceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ReduceDescriptor descriptor 
)

Definition at line 19 of file NeonReduceWorkload.cpp.

22 {
23  if ( descriptor.m_vAxis.size()==1 || descriptor.m_vAxis.empty())
24  {
25  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
27 
28  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
29  input.GetNumDimensions(),
30  descriptor.m_vAxis);
31 
32  return arm_compute::NEReductionOperation::validate(&aclInputInfo,
33  &aclOutputInfo,
34  static_cast<unsigned int>(coords[0]),
36  descriptor.m_KeepDims);
37  }
38  else
39  {
40  // Validate layer if there are multiple axes.
41  arm_compute::Status status;
43  return status;
44  }
45 }

References ReduceDescriptor::m_vAxis.

Referenced by NeonLayerSupport::IsReduceSupported().

◆ NeonReshapeWorkloadValidate()

arm_compute::Status NeonReshapeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonReshapeWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
24 }

Referenced by NeonLayerSupport::IsReshapeSupported().

◆ NeonResizeWorkloadValidate()

arm_compute::Status NeonResizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ResizeDescriptor descriptor 
)

Definition at line 22 of file NeonResizeWorkload.cpp.

25 {
26  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
27  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
28 
29  arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout);
30  aclInputInfo.set_data_layout(aclDataLayout);
31  aclOutputInfo.set_data_layout(aclDataLayout);
32 
33  arm_compute::InterpolationPolicy aclInterpolationPolicy =
35 
36  arm_compute::SamplingPolicy samplingPolicy = descriptor.m_HalfPixelCenters ? arm_compute::SamplingPolicy::CENTER :
37  arm_compute::SamplingPolicy::TOP_LEFT;
38 
39  bool usePadding = false;
40 
41  return arm_compute::NEScale::validate(&aclInputInfo,
42  &aclOutputInfo,
43  arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
44  arm_compute::BorderMode::REPLICATE,
45  arm_compute::PixelValue(0.f),
46  samplingPolicy,
47  usePadding,
48  descriptor.m_AlignCorners));
49 
50 }

Referenced by NeonLayerSupport::IsResizeSupported().

◆ NeonReverseV2WorkloadValidate()

arm_compute::Status NeonReverseV2WorkloadValidate ( const TensorInfo input,
const TensorInfo axis,
const TensorInfo output 
)

Definition at line 14 of file NeonReverseV2Workload.cpp.

17 {
18  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
19  const arm_compute::TensorInfo aclAxis = BuildArmComputeTensorInfo(axis);
20  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEReverse::validate(&aclInput, &aclOutput, &aclAxis, true);
23 }

Referenced by NeonLayerSupport::IsReverseV2Supported().

◆ NeonRsqrtWorkloadValidate()

arm_compute::Status NeonRsqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file NeonRsqrtWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NERsqrtLayer::validate(&aclInput, &aclOutput);
24 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonSinWorkloadValidate()

arm_compute::Status NeonSinWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonSinWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NESinLayer::validate(&aclInput, &aclOutput);
23 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonSliceWorkloadValidate()

arm_compute::Status NeonSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SliceDescriptor descriptor 
)

Definition at line 21 of file NeonSliceWorkload.cpp.

24 {
25  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
27 
30 
31  std::tie(starts, ends) = SetNeonSliceData(descriptor.m_Begin, descriptor.m_Size);
32 
33  return arm_compute::NESlice::validate(&aclInputInfo, &aclOutputInfo, starts, ends);
34 }

Referenced by NeonLayerSupport::IsSliceSupported().

◆ NeonSoftmaxWorkloadValidate()

arm_compute::Status NeonSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor 
)

Definition at line 19 of file NeonSoftmaxWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
24  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
25 
26  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
27  return arm_compute::NESoftmaxLayer::validate(&aclInputInfo,
28  &aclOutputInfo,
29  descriptor.m_Beta,
30  aclAxis);
31 }

Referenced by NeonLayerSupport::IsSoftmaxSupported().

◆ NeonSpaceToBatchNdWorkloadValidate()

arm_compute::Status NeonSpaceToBatchNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToBatchNdDescriptor descriptor 
)

Definition at line 15 of file NeonSpaceToBatchNdWorkload.cpp.

18 {
19  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
20  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
21 
22  arm_compute::Status statusSpaceToBatch = arm_compute::Status(arm_compute::ErrorCode::OK);
23  arm_compute::Status statusReshapeInput = arm_compute::Status(arm_compute::ErrorCode::OK);
24  arm_compute::Status statusReshapeOutput = arm_compute::Status(arm_compute::ErrorCode::OK);
25 
26  arm_compute::TensorInfo aclReshapeInputInfo = aclInputInfo;
27  arm_compute::TensorInfo aclReshapeOutputInfo = aclOutputInfo;
28 
29  // When a spacial dimension is missing (rank=3) set W to 1
30  const unsigned int rank = input.GetNumDimensions();
31  if (rank == 3)
32  {
33  const arm_compute::TensorShape inputShape = aclInputInfo.tensor_shape();
34  const arm_compute::TensorShape outputShape = aclOutputInfo.tensor_shape();
35 
36  if (descriptor.m_DataLayout == armnn::DataLayout::NHWC)
37  {
38  // In ACL dimensions are right to left: C, W, H, N
39  aclReshapeInputInfo.set_tensor_shape({inputShape.x(), 1, inputShape.y(), inputShape.z()});
40  aclReshapeOutputInfo.set_tensor_shape({outputShape.x(), 1, outputShape.y(), outputShape.z()});
41  }
42  else if (descriptor.m_DataLayout == armnn::DataLayout::NCHW)
43  {
44  // In ACL dimensions are right to left: W, H, C, N
45  aclReshapeInputInfo.set_tensor_shape({1, inputShape.x(), inputShape.y(), inputShape.z()});
46  aclReshapeOutputInfo.set_tensor_shape({1, outputShape.x(), outputShape.y(), outputShape.z()});
47  }
48  else
49  {
50  throw InvalidArgumentException("Unsupported or unknown DataLayout", CHECK_LOCATION());
51  }
52 
53  statusReshapeInput = arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclReshapeInputInfo);
54  statusReshapeOutput = arm_compute::NEReshapeLayer::validate(&aclReshapeOutputInfo, &aclOutputInfo);
55  }
56 
57  // ArmNN blockShape is [H, W] ACl asks for W, H
58  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
59  int32_t blockWidth = (rank == 3) ? 1 : armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
60 
61  unsigned int padLeft = (rank == 3) ? 0 : descriptor.m_PadList[1].first;
62  unsigned int padRight = (rank == 3) ? 0 : descriptor.m_PadList[1].second;
63  arm_compute::Size2D paddingLeftTop = BuildArmComputeSize2D(padLeft,
64  descriptor.m_PadList[0].first);
65  arm_compute::Size2D paddingRightBottom = BuildArmComputeSize2D(padRight,
66  descriptor.m_PadList[0].second);
67 
68  statusSpaceToBatch = arm_compute::NESpaceToBatchLayer::validate(rank == 3 ? &aclReshapeInputInfo : &aclInputInfo,
69  blockWidth,
70  blockHeight,
71  paddingLeftTop,
72  paddingRightBottom,
73  rank == 3 ? &aclReshapeOutputInfo : &aclOutputInfo);
74 
75  if (statusReshapeInput.error_code() == arm_compute::ErrorCode::OK &&
76  statusReshapeOutput.error_code() == arm_compute::ErrorCode::OK &&
77  statusSpaceToBatch.error_code() == arm_compute::ErrorCode::OK)
78  {
79  return arm_compute::Status(arm_compute::ErrorCode::OK,
80  "All SpaceToBatch layers validate status OK.");
81  }
82  else
83  {
84  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
85  "SpaceToBatch layer validate status failed."
86  + statusSpaceToBatch.error_description()
87  + statusReshapeInput.error_description()
88  + statusReshapeOutput.error_description());
89  }
90 }

Referenced by NeonLayerSupport::IsSpaceToBatchNdSupported().

◆ NeonSpaceToDepthWorkloadValidate()

arm_compute::Status NeonSpaceToDepthWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToDepthDescriptor descriptor 
)

Definition at line 19 of file NeonSpaceToDepthWorkload.cpp.

22 {
23  DataLayout dataLayout = descriptor.m_DataLayout;
24  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
25  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
26 
27  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
28 
29  return arm_compute::NESpaceToDepthLayer::validate(&aclInput, &aclOutput, blockSize);
30 }

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by NeonLayerSupport::IsSpaceToDepthSupported().

◆ NeonSplitterWorkloadValidate()

arm_compute::Status NeonSplitterWorkloadValidate ( const TensorInfo input,
const std::vector< std::reference_wrapper< TensorInfo >> &  outputs,
unsigned int  splitAxis 
)

Definition at line 32 of file NeonSplitterWorkload.cpp.

35 {
36  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
37 
38  size_t numOutputs = outputs.size();
39 
40  std::vector<arm_compute::TensorInfo> aclOutputs;
41  aclOutputs.reserve(numOutputs);
42 
43  std::vector<arm_compute::ITensorInfo*> aclOutputPtr;
44  aclOutputPtr.reserve(numOutputs);
45 
46  for (size_t i = 0u; i < outputs.size(); ++i)
47  {
48  aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i]));
49  aclOutputPtr.emplace_back(&aclOutputs.back());
50  }
51 
52  unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis);
53  return arm_compute::NESplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
54 }

Referenced by NeonLayerSupport::IsSplitterSupported().

◆ NeonSqrtWorkloadValidate()

arm_compute::Status NeonSqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file NeonSqrtWorkload.cpp.

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  ActivationDescriptor descriptor;
24  descriptor.m_Function = ActivationFunction::Sqrt;
25  const arm_compute::ActivationLayerInfo activationLayerInfo =
27 
28  return arm_compute::NEActivationLayer::validate(&aclInput, &aclOutput, activationLayerInfo);
29 }

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

◆ NeonStackWorkloadValidate()

arm_compute::Status NeonStackWorkloadValidate ( const std::vector< const TensorInfo * > &  inputs,
const TensorInfo output,
const StackDescriptor descriptor 
)

Definition at line 27 of file NeonStackWorkload.cpp.

30 {
31  std::vector<arm_compute::TensorInfo> aclInputs;
32  for (const TensorInfo* input : inputs)
33  {
34  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
35  aclInputs.emplace_back(aclInputInfo);
36  }
37 
38  std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
39  for (arm_compute::ITensorInfo& input : aclInputs)
40  {
41  aclInputPtrs.emplace_back(&input);
42  }
43 
44  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
45  int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
46  return arm_compute::NEStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
47 }

Referenced by NeonLayerSupport::IsStackSupported().

◆ NeonStridedSliceWorkloadValidate()

arm_compute::Status NeonStridedSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const StridedSliceDescriptor descriptor 
)

Definition at line 19 of file NeonStridedSliceWorkload.cpp.

22 {
23  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input,
24  descriptor.m_DataLayout);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output,
26  descriptor.m_DataLayout);
27 
31 
32  std::tie(starts, ends, strides) = SetNeonStridedSliceData(descriptor.m_Begin,
33  descriptor.m_End,
34  descriptor.m_Stride);
35 
36  auto numDimensions = armnn::numeric_cast<int>(input.GetNumDimensions());
37  int32_t begin_mask = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions);
38  int32_t end_mask = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions);
39  int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions);
40 
41  return arm_compute::NEStridedSlice::validate(&aclInput,
42  &aclOutput,
43  starts,
44  ends,
45  strides,
46  begin_mask,
47  end_mask,
48  shrink_axis_mask);
49 }

Referenced by NeonLayerSupport::IsStridedSliceSupported().

◆ NeonSubtractionWorkloadValidate()

arm_compute::Status NeonSubtractionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 22 of file NeonSubtractionWorkload.cpp.

26 {
27  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
28  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
29  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
30 
31  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
32  activationDescriptor);
33 
34  return arm_compute::NEArithmeticSubtraction::validate(&aclInput0,
35  &aclInput1,
36  &aclOutput,
37  arm_compute::ConvertPolicy::SATURATE,
38  activationInfo);
39 }

Referenced by IsLayerTypeSupported(), NeonLayerSupport::IsSubtractionSupported(), and NeonBackend::OptimizeSubgraphView().

◆ NeonTensorHandleFactoryId()

constexpr const char* armnn::NeonTensorHandleFactoryId ( )
constexpr

Definition at line 14 of file NeonTensorHandleFactory.hpp.

14 { return "Arm/Neon/TensorHandleFactory"; }

Referenced by NeonTensorHandleFactory::GetIdStatic().

◆ NeonTileWorkloadValidate()

arm_compute::Status NeonTileWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TileDescriptor descriptor 
)

Definition at line 14 of file NeonTileWorkload.cpp.

17 {
18  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
19  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
20 
21  std::vector<unsigned int> aclMultiples = descriptor.m_Multiples;
22  std::reverse(aclMultiples.begin(),aclMultiples.end());
23 
24  return arm_compute::NETile::validate(&aclInput, &aclOutput, aclMultiples);
25 }

Referenced by NeonLayerSupport::IsTileSupported().

◆ NeonTransposeConvolution2dWorkloadValidate()

arm_compute::Status NeonTransposeConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 25 of file NeonTransposeConvolution2dWorkload.cpp.

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
33  const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
34 
35  arm_compute::TensorInfo aclBiasesInfo;
36  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
37 
38  if (descriptor.m_BiasEnabled)
39  {
40  ARMNN_ASSERT(biases.has_value());
41 
42  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
43  optionalAclBiasesInfo = &aclBiasesInfo;
44  }
45 
46  arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
47 
48  return arm_compute::NEDeconvolutionLayer::validate(&aclInputInfo,
49  &aclWeightsInfo,
50  optionalAclBiasesInfo,
51  &aclOutputInfo,
52  layerInfo);
53 }

Referenced by NeonLayerSupport::IsTransposeConvolution2dSupported().

◆ NeonTransposeWorkloadValidate()

arm_compute::Status NeonTransposeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeDescriptor descriptor 
)

Definition at line 15 of file NeonTransposeWorkload.cpp.

18 {
19  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
22 
23  return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
24  armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
25 }

Referenced by NeonLayerSupport::IsTransposeSupported().

◆ NeonUnidirectionalSequenceLstmFloatWorkloadValidate()

arm_compute::Status NeonUnidirectionalSequenceLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const UnidirectionalSequenceLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 510 of file NeonUnidirectionalSequenceLstmFloatWorkload.cpp.

518 {
519  TensorShape inputLayerShape = input.GetShape();
520  TensorShape outputLayerShape = output.GetShape();
521 
522  if (inputLayerShape.GetNumDimensions() != 3)
523  {
524  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
525  "Unidirectional Sequence LSTM layer validate status failed.");
526  }
527 
528  unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
529  unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
530  unsigned int inputSize = inputLayerShape[2];
531  unsigned int outputSize = outputLayerShape[2];
532 
533  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
534  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
535 
536  arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
537  "Permute1 status");
538  arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
539  "Split status");
540  arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
541  "LSTM status");
542  arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
543  "Concat status");
544  arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
545  "Permute2 status");
546 
547  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
548  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
549 
550  //
551  // Permute validate
552  //
553  TensorInfo permuteOutInfo = armnnUtils::Permuted(input, { 1U, 0U, 2U });
554  arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
555  if (!descriptor.m_TimeMajor)
556  {
557  statusPermute1 = arm_compute::NEPermute::validate(&aclInputInfo,
558  &aclPermuteOutInfo,
559  arm_compute::PermutationVector(0U, 2U, 1U));
560  }
561 
562  //
563  // Split and Concat Tensors validate
564  //
565  std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
566  std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
567  std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
568  std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
569  splitterOutputsTensorInfos.reserve(maxTime);
570  concatInputsTensorInfos.reserve(maxTime);
571  for (unsigned int i = 0; i < maxTime; ++i)
572  {
573  arm_compute::TensorInfo splitter_out;
574  arm_compute::TensorInfo concat_in;
575 
576  auto splitterTensorInfo = TensorInfo(input);
577  auto concatTensorInfo = TensorInfo(output);
578  splitterTensorInfo.SetShape({batchSize, inputSize});
579  concatTensorInfo.SetShape({batchSize, outputSize});
580 
581  arm_compute::TensorInfo aclSplitterTensorInfo
582  = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
583  arm_compute::TensorInfo aclConcatTensorInfo
584  = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
585 
586  splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
587  concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
588  splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
589  concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
590  }
591 
592  //
593  // Split validate
594  //
595  unsigned int numberDimensions = 3;
596  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
597  unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
598 
599  if (maxTime != 1) // ACL split does not work with only one element to split.
600  {
601  if (!descriptor.m_TimeMajor)
602  {
603  statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,
604  splitterOutputsTensorInfosPtr,
605  aclAxisSplit);
606  }
607  else
608  {
609  statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
610  }
611  }
612 
613  //
614  // LSTM validate
615  //
616 
617  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
618 
619  unsigned int numUnits = cellStateIn.GetShape()[1];
620  unsigned int scratchBufferFactor = 4;
621 
622  if (descriptor.m_CifgEnabled)
623  {
624  // scratchBuffer = { batchSize, numUnits * 3 } with CIFG
625  scratchBufferFactor = 3;
626  }
627 
628  const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());
629 
630  // The inputs and outputs
631  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
632  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
633  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
634  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
635  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
636 
637  // Basic parameters
638  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
639  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
640  const arm_compute::TensorInfo aclInputToCellWeightsInfo
641  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
642  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
643  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
644  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
645  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
646  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
647  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
648  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
649  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
650  const arm_compute::TensorInfo aclForgetGateBiasInfo
651  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
652  const arm_compute::TensorInfo aclCellBiasInfo
653  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
654  const arm_compute::TensorInfo aclOutputGateBiasInfo
655  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
656 
657  arm_compute::TensorInfo aclInputToInputWeightsInfo;
658  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
659  arm_compute::TensorInfo aclCellToInputWeightsInfo;
660  arm_compute::TensorInfo aclInputGateBiasInfo;
661  arm_compute::TensorInfo aclProjectionWeightsInfo;
662  arm_compute::TensorInfo aclProjectionBiasInfo;
663  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
664  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
665 
666  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
667  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
668  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
669  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
670 
671 
672  if (!descriptor.m_CifgEnabled)
673  {
674  if (descriptor.m_PeepholeEnabled)
675  {
676  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
677  }
678  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
679  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
680  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
681 
682  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
683  &aclRecurrentToInputWeightsInfo,
684  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
685  &aclInputGateBiasInfo);
686  }
687 
688  if (descriptor.m_ProjectionEnabled)
689  {
690  if (paramsInfo.m_ProjectionBias != nullptr)
691  {
692  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
693  }
694  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
695 
696  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
697  paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
698  }
699 
700  if (descriptor.m_PeepholeEnabled)
701  {
702  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
703  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
704 
705  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
706  }
707 
708  if (descriptor.m_LayerNormEnabled)
709  {
710  if (!descriptor.m_CifgEnabled)
711  {
712  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
713  }
714  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
715  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
716  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
717 
718  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
719  &aclInputLayerNormWeightsInfo,
720  &aclForgetLayerNormWeightsInfo,
721  &aclCellLayerNormWeightsInfo,
722  &aclOutputLayerNormWeightsInfo);
723  }
724 
725  // Need to be set at negative threshold to be compatible for ACL
726  float cell_threshold = descriptor.m_ClippingThresCell;
727  float projection_threshold = descriptor.m_ClippingThresProj;
728 
729  arm_compute::ActivationLayerInfo activationLayerInfo =
730  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
731 
732  for (unsigned int i = 0; i != maxTime; ++i)
733  {
734 
735  // Set LSTM input and output ITensors depending on:
736  // input format (timeMajor) & number of LSTM batches (maxTime).
737  arm_compute::ITensorInfo* outputLSTM;
738  arm_compute::ITensorInfo* inputLSTM;
739 
740  // If there is only one LSTM time major batch, we will not concat OR permute.
741  // Set input of LSTM to be first input ITensor.
742  // Set output of LSTM to be final output ITensor.
743  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
744  if (maxTime == 1 && descriptor.m_TimeMajor)
745  {
746  TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
747  TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
748 
749  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
750  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
751 
752  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
753  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
754 
755  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
756  inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
757 
758  const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
759  outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
760  }
761  // If there is only one LSTM batch major batch, we will not concat, only permute.
762  // Set input of LSTM to be output of initial permute.
763  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
764  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
765  else if (maxTime == 1 && !descriptor.m_TimeMajor)
766  {
767  TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
768  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
769  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
770  aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
771  inputLSTM = &aclPermuteOutInfo;
772 
773  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
774  }
775  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
776  else
777  {
778  inputLSTM = splitterOutputsTensorInfosPtr[i];
779  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
780  }
781 
782  statusLSTM = arm_compute::NELSTMLayer::validate(inputLSTM,
783  &aclInputToForgetWeightsInfo,
784  &aclInputToCellWeightsInfo,
785  &aclInputToOutputWeightsInfo,
786  &aclRecurrentToForgetWeightsInfo,
787  &aclRecurrentToCellWeightsInfo,
788  &aclRecurrentToOutputWeightsInfo,
789  &aclForgetGateBiasInfo,
790  &aclCellBiasInfo,
791  &aclOutputGateBiasInfo,
792  &aclOutputStateInInfo,
793  &aclCellStateInInfo,
794  &aclScratchBufferInfo,
795  &aclOutputStateOutInfo,
796  &aclCellStateOutInfo,
797  outputLSTM,
798  lstm_params_info,
799  activationLayerInfo,
800  cell_threshold,
801  projection_threshold);
802 
803  if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
804  {
805  break;
806  }
807  }
808 
809  //
810  // Concat validate
811  //
812 
813  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
814  TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
815  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
816  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
817 
818  TensorInfo concatOutputTensorInfo = TensorInfo(output);
819  concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
820  arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
821 
822  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
823  {
824  for (unsigned int i = 0; i < maxTime; ++i)
825  {
826  auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
827  concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
828  }
829 
830  unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
831  if (!descriptor.m_TimeMajor)
832  {
833  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
834  &aclConcatOutputTensorInfo,
835  aclAxisConcat);
836  }
837  else
838  {
839  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
840  &aclOutputInfo,
841  aclAxisConcat);
842  }
843  }
844  // If only one LSTM batch, we do not concat and/or permute.
845  // Must ensure final output info is expanded to correct batch major dimensions.
846  else
847  {
848  if (!descriptor.m_TimeMajor)
849  {
850  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
851  BuildArmComputeTensorShape(shapeExpandBatchMajor));
852  }
853  else
854  {
855  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
856  BuildArmComputeTensorShape(shapeExpandTimeMajor));
857  }
858  }
859 
860  //
861  // Permute validate
862  //
863  if (!descriptor.m_TimeMajor)
864  {
865  // Output now time major. Permute output back to batch major.
866  if (maxTime != 1)
867  {
868  statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
869  &aclOutputInfo,
870  arm_compute::PermutationVector(0U, 2U, 1U));
871  }
872  else
873  {
874  statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],
875  &aclOutputInfo,
876  arm_compute::PermutationVector(0U, 2U, 1U));
877  }
878  }
879 
880  auto okCode = arm_compute::ErrorCode::OK;
881  if (statusPermute1.error_code() == okCode &&
882  statusSplit.error_code() == okCode &&
883  statusLSTM .error_code() == okCode &&
884  statusConcat.error_code() == okCode &&
885  statusPermute2.error_code() == okCode)
886  {
887  return arm_compute::Status(arm_compute::ErrorCode::OK,
888  "All Unidirectional Sequence LSTM layer validate status OK.");
889  }
890  else
891  {
892  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
893  "Unidirectional Sequence LSTM layer validate status failed.");
894  }
895 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by NeonLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ NeonUnidirectionalSequenceLstmWorkloadValidate()

arm_compute::Status NeonUnidirectionalSequenceLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const UnidirectionalSequenceLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 491 of file NeonUnidirectionalSequenceLstmWorkload.cpp.

499 {
500  TensorShape inputLayerShape = input.GetShape();
501  TensorShape outputLayerShape = output.GetShape();
502 
503  if (inputLayerShape.GetNumDimensions() != 3)
504  {
505  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
506  "Unidirectional Sequence LSTM layer validate status failed.");
507  }
508 
509  unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
510  unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
511  unsigned int inputSize = inputLayerShape[2];
512  unsigned int outputSize = outputLayerShape[2];
513 
514  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
515  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
516 
517  arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
518  "Permute1 status");
519  arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
520  "Split status");
521  arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
522  "LSTM status");
523  arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
524  "Concat status");
525  arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
526  "Permute2 status");
527 
528  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
529  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
530 
531  //
532  // Permute validate
533  //
534  TensorInfo permuteOutInfo = armnnUtils::Permuted(input, { 1U, 0U, 2U });
535  arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
536  if (!descriptor.m_TimeMajor)
537  {
538  statusPermute1 = arm_compute::NEPermute::validate(&aclInputInfo,
539  &aclPermuteOutInfo,
540  arm_compute::PermutationVector(0U, 2U, 1U));
541  }
542 
543  //
544  // Split and Concat Tensors validate
545  //
546  std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
547  std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
548  std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
549  std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
550  splitterOutputsTensorInfos.reserve(maxTime);
551  concatInputsTensorInfos.reserve(maxTime);
552  for (unsigned int i = 0; i < maxTime; ++i)
553  {
554  arm_compute::TensorInfo splitter_out;
555  arm_compute::TensorInfo concat_in;
556 
557  auto splitterTensorInfo = TensorInfo(input);
558  auto concatTensorInfo = TensorInfo(output);
559  splitterTensorInfo.SetShape({batchSize, inputSize});
560  concatTensorInfo.SetShape({batchSize, outputSize});
561 
562  arm_compute::TensorInfo aclSplitterTensorInfo
563  = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
564  arm_compute::TensorInfo aclConcatTensorInfo
565  = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
566 
567  splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
568  concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
569  splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
570  concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
571  }
572 
573  //
574  // Split validate
575  //
576  unsigned int numberDimensions = 3;
577  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
578  unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
579 
580  if (maxTime != 1) // ACL split does not work with only one element to split.
581  {
582  if (!descriptor.m_TimeMajor)
583  {
584  statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,
585  splitterOutputsTensorInfosPtr,
586  aclAxisSplit);
587  } else
588  {
589  statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
590  }
591  }
592 
593  //
594  // LSTM validate
595  //
596 
597  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
598 
599  unsigned int numUnits = cellStateIn.GetShape()[1];
600  unsigned int scratchBufferFactor = 4;
601 
602  if (descriptor.m_CifgEnabled)
603  {
604  // scratchBuffer = { batchSize, numUnits * 3 } with CIFG
605  scratchBufferFactor = 3;
606  }
607 
608  const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());
609 
610 
611  lstm_params_info.set_cell_clip_params(descriptor.m_ClippingThresCell);
612  lstm_params_info.set_projection_clip_params(descriptor.m_ClippingThresProj);
613  // The inputs and outputs
614  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
615  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
616  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
617  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
618  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
619 
620  // Basic parameters
621  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
622  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
623  const arm_compute::TensorInfo aclInputToCellWeightsInfo
624  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
625  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
626  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
627  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
628  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
629  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
630  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
631  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
632  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
633  const arm_compute::TensorInfo aclForgetGateBiasInfo
634  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
635  const arm_compute::TensorInfo aclCellBiasInfo
636  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
637  const arm_compute::TensorInfo aclOutputGateBiasInfo
638  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
639 
640  arm_compute::TensorInfo aclInputToInputWeightsInfo;
641  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
642  arm_compute::TensorInfo aclCellToInputWeightsInfo;
643  arm_compute::TensorInfo aclInputGateBiasInfo;
644  arm_compute::TensorInfo aclProjectionWeightsInfo;
645  arm_compute::TensorInfo aclProjectionBiasInfo;
646  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
647  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
648 
649  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
650  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
651  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
652  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
653 
654  if (!descriptor.m_CifgEnabled)
655  {
656  if (descriptor.m_PeepholeEnabled)
657  {
658  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
659  }
660  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
661  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
662  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
663 
664  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
665  &aclRecurrentToInputWeightsInfo,
666  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
667  &aclInputGateBiasInfo);
668  }
669 
670  if (descriptor.m_ProjectionEnabled)
671  {
672  if (paramsInfo.m_ProjectionBias != nullptr)
673  {
674  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
675  }
676  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
677 
678  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
679  paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
680  }
681 
682  if (descriptor.m_PeepholeEnabled)
683  {
684  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
685  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
686 
687  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
688  }
689 
690  if (descriptor.m_LayerNormEnabled)
691  {
692  if (!descriptor.m_CifgEnabled)
693  {
694  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
695  }
696  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
697  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
698  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
699 
700  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
701  &aclInputLayerNormWeightsInfo,
702  &aclForgetLayerNormWeightsInfo,
703  &aclCellLayerNormWeightsInfo,
704  &aclOutputLayerNormWeightsInfo);
705  }
706 
707  lstm_params_info.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
708  descriptor.m_ForgetIntermediateScale,
709  descriptor.m_CellIntermediateScale,
710  descriptor.m_OutputIntermediateScale);
711 
712  lstm_params_info.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
713 
714  for (unsigned int i = 0; i != maxTime; ++i)
715  {
716 
717  // Set LSTM input and output ITensors depending on:
718  // input format (timeMajor) & number of LSTM batches (maxTime).
719  arm_compute::ITensorInfo* outputLSTM;
720  arm_compute::ITensorInfo* inputLSTM;
721 
722  // If there is only one LSTM time major batch, we will not concat OR permute.
723  // Set input of LSTM to be first input ITensor.
724  // Set output of LSTM to be final output ITensor.
725  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
726  if (maxTime == 1 && descriptor.m_TimeMajor)
727  {
728  TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
729  TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
730 
731  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
732  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
733 
734  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
735  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
736 
737  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
738  inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
739 
740  const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
741  outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
742  }
743  // If there is only one LSTM batch major batch, we will not concat, only permute.
744  // Set input of LSTM to be output of initial permute.
745  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
746  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
747  else if (maxTime == 1 && !descriptor.m_TimeMajor)
748  {
749  TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
750  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
751  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
752  aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
753  inputLSTM = &aclPermuteOutInfo;
754 
755  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
756  }
757  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
758  else
759  {
760  inputLSTM = splitterOutputsTensorInfosPtr[i];
761  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
762  }
763 
764  statusLSTM = arm_compute::NEQLSTMLayer::validate(inputLSTM,
765  &aclInputToForgetWeightsInfo,
766  &aclInputToCellWeightsInfo,
767  &aclInputToOutputWeightsInfo,
768  &aclRecurrentToForgetWeightsInfo,
769  &aclRecurrentToCellWeightsInfo,
770  &aclRecurrentToOutputWeightsInfo,
771  &aclForgetGateBiasInfo,
772  &aclCellBiasInfo,
773  &aclOutputGateBiasInfo,
774  &aclCellStateInInfo,
775  &aclOutputStateInInfo,
776  &aclCellStateOutInfo,
777  &aclOutputStateOutInfo,
778  outputLSTM,
779  lstm_params_info);
780  }
781 
782  //
783  // Concat validate
784  //
785 
786  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
787  TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
788  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
789  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
790 
791  TensorInfo concatOutputTensorInfo = TensorInfo(output);
792  concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
793  arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
794 
795  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
796  {
797  for (unsigned int i = 0; i < maxTime; ++i)
798  {
799  auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
800  concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
801  }
802 
803  unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
804  if (!descriptor.m_TimeMajor)
805  {
806  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
807  &aclConcatOutputTensorInfo,
808  aclAxisConcat);
809  }
810  else
811  {
812  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
813  &aclOutputInfo,
814  aclAxisConcat);
815  }
816  }
817  // If only one LSTM batch, we do not concat and/or permute.
818  // Must ensure final output info is expanded to correct batch major dimensions.
819  else
820  {
821  if (!descriptor.m_TimeMajor)
822  {
823  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
824  BuildArmComputeTensorShape(shapeExpandBatchMajor));
825  }
826  else
827  {
828  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
829  BuildArmComputeTensorShape(shapeExpandTimeMajor));
830  }
831  }
832 
833  //
834  // Permute validate
835  //
836  if (!descriptor.m_TimeMajor)
837  {
838  // Output now time major. Permute output back to batch major.
839  if (maxTime != 1)
840  {
841  statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
842  &aclOutputInfo,
843  arm_compute::PermutationVector(0U, 2U, 1U));
844  }
845  else
846  {
847  statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],
848  &aclOutputInfo,
849  arm_compute::PermutationVector(0U, 2U, 1U));
850  }
851  }
852 
853  auto okCode = arm_compute::ErrorCode::OK;
854  if (statusPermute1.error_code() == okCode &&
855  statusSplit.error_code() == okCode &&
856  statusLSTM .error_code() == okCode &&
857  statusConcat.error_code() == okCode &&
858  statusPermute2.error_code() == okCode)
859  {
860  return arm_compute::Status(arm_compute::ErrorCode::OK,
861  "All Unidirectional Sequence LSTM layer validate status OK.");
862  }
863  else
864  {
865  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
866  "Unidirectional Sequence LSTM layer validate status failed.");
867  }
868 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by NeonLayerSupport::IsUnidirectionalSequenceLstmSupported().

◆ NextIndex()

bool armnn::NextIndex ( const unsigned int  numDims,
const armnn::TensorShape dims,
std::vector< unsigned int > &  current 
)

Definition at line 19 of file Reduce.cpp.

20 {
21  unsigned int carry = 1;
22 
23  for (unsigned int idx = numDims; idx-- > 0; )
24  {
25  unsigned int current_val = current[idx] + carry;
26  if (dims[idx] == current_val)
27  {
28  current[idx] = 0;
29  }
30  else
31  {
32  current[idx] = current_val;
33  carry = 0;
34  break;
35  }
36  }
37  return (carry == 0);
38 }

Referenced by Reduce().

◆ NonMaxSuppression()

std::vector< unsigned int > NonMaxSuppression ( unsigned int  numBoxes,
const std::vector< float > &  boxCorners,
const std::vector< float > &  scores,
float  nmsScoreThreshold,
unsigned int  maxDetection,
float  nmsIouThreshold 
)

Definition at line 50 of file DetectionPostProcess.cpp.

56 {
57  // Select boxes that have scores above a given threshold.
58  std::vector<float> scoresAboveThreshold;
59  std::vector<unsigned int> indicesAboveThreshold;
60  for (unsigned int i = 0; i < numBoxes; ++i)
61  {
62  if (scores[i] >= nmsScoreThreshold)
63  {
64  scoresAboveThreshold.push_back(scores[i]);
65  indicesAboveThreshold.push_back(i);
66  }
67  }
68 
69  // Sort the indices based on scores.
70  unsigned int numAboveThreshold = armnn::numeric_cast<unsigned int>(scoresAboveThreshold.size());
71  std::vector<unsigned int> sortedIndices = GenerateRangeK(numAboveThreshold);
72  TopKSort(numAboveThreshold, sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold);
73 
74  // Number of output cannot be more than max detections specified in the option.
75  unsigned int numOutput = std::min(maxDetection, numAboveThreshold);
76  std::vector<unsigned int> outputIndices;
77  std::vector<bool> visited(numAboveThreshold, false);
78 
79  // Prune out the boxes with high intersection over union by keeping the box with higher score.
80  for (unsigned int i = 0; i < numAboveThreshold; ++i)
81  {
82  if (outputIndices.size() >= numOutput)
83  {
84  break;
85  }
86  if (!visited[sortedIndices[i]])
87  {
88  outputIndices.push_back(indicesAboveThreshold[sortedIndices[i]]);
89  for (unsigned int j = i + 1; j < numAboveThreshold; ++j)
90  {
91  unsigned int iIndex = indicesAboveThreshold[sortedIndices[i]] * 4;
92  unsigned int jIndex = indicesAboveThreshold[sortedIndices[j]] * 4;
93  if (IntersectionOverUnion(&boxCorners[iIndex], &boxCorners[jIndex]) > nmsIouThreshold)
94  {
95  visited[sortedIndices[j]] = true;
96  }
97  }
98  }
99  }
100  return outputIndices;
101 }

References GenerateRangeK(), IntersectionOverUnion(), and TopKSort().

Referenced by DetectionPostProcess().

◆ numeric_cast() [1/9]

std::enable_if_t< std::is_unsigned<Source>::value && std::is_unsigned<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 35 of file NumericCast.hpp.

36 {
37 #if ENABLE_NUMERIC_CAST_CHECKS
38  if (source > std::numeric_limits<Dest>::max())
39  {
40  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to "
41  "narrower unsigned type. Overflow detected.");
42  }
43 #endif // ENABLE_NUMERIC_CAST_CHECKS
44 
45  return static_cast<Dest>(source);
46 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [2/9]

std::enable_if_t< std::is_signed<Source>::value && std::is_integral<Source>::value && std::is_signed<Dest>::value && std::is_integral<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 58 of file NumericCast.hpp.

59 {
60 #if ENABLE_NUMERIC_CAST_CHECKS
61  if (source > std::numeric_limits<Dest>::max())
62  {
63  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
64  "Overflow detected.");
65  }
66 
67  if (source < std::numeric_limits<Dest>::lowest())
68  {
69  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
70  "Underflow detected.");
71  }
72 #endif // ENABLE_NUMERIC_CAST_CHECKS
73 
74  return static_cast<Dest>(source);
75 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [3/9]

std::enable_if_t< std::is_floating_point<Source>::value && std::is_floating_point<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 83 of file NumericCast.hpp.

84 {
85 #if ENABLE_NUMERIC_CAST_CHECKS
86  if (source > std::numeric_limits<Dest>::max())
87  {
88  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
89  "Overflow detected.");
90  }
91 
92  if (source < std::numeric_limits<Dest>::lowest())
93  {
94  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
95  "Underflow detected.");
96  }
97 #endif // ENABLE_NUMERIC_CAST_CHECKS
98 
99  return static_cast<Dest>(source);
100 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [4/9]

std::enable_if_t< std::is_floating_point<Source>::value && std::is_signed<Dest>::value && std::is_integral<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 109 of file NumericCast.hpp.

110 {
111 #if ENABLE_NUMERIC_CAST_CHECKS
112  if (source > static_cast<Source>(std::numeric_limits<Dest>::max()))
113  {
114  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
115  "Overflow detected.");
116  }
117 
118  if (source < static_cast<Source>(std::numeric_limits<Dest>::lowest()))
119  {
120  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
121  "Underflow detected.");
122  }
123 #endif // ENABLE_NUMERIC_CAST_CHECKS
124 
125  return static_cast<Dest>(source);
126 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [5/9]

std::enable_if_t< std::is_signed<Source>::value && std::is_integral<Source>::value && std::is_floating_point<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 135 of file NumericCast.hpp.

136 {
137 #if ENABLE_NUMERIC_CAST_CHECKS
138  Dest sourceConverted = static_cast<Dest>(source);
139 
140  if (sourceConverted > std::numeric_limits<Dest>::max())
141  {
142  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
143  "Overflow detected.");
144  }
145 
146  if (sourceConverted < std::numeric_limits<Dest>::lowest())
147  {
148  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
149  "Underflow detected.");
150  }
151 #endif // ENABLE_NUMERIC_CAST_CHECKS
152 
153  return static_cast<Dest>(source);
154 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [6/9]

std::enable_if_t< std::is_signed<Dest>::value && std::is_integral<Dest>::value && std::is_unsigned<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 165 of file NumericCast.hpp.

166 {
167 #if ENABLE_NUMERIC_CAST_CHECKS
168  if (sValue > static_cast< typename std::make_unsigned<Dest>::type >(std::numeric_limits<Dest>::max()))
169  {
170  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to signed type. "
171  "Overflow detected.");
172  }
173 #endif // ENABLE_NUMERIC_CAST_CHECKS
174 
175  return static_cast<Dest>(sValue);
176 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [7/9]

std::enable_if_t< std::is_floating_point<Dest>::value && std::is_unsigned<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 184 of file NumericCast.hpp.

185 {
186 #if ENABLE_NUMERIC_CAST_CHECKS
187  if (static_cast<Dest>(sValue) > std::numeric_limits<Dest>::max())
188  {
189  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to floating point type. "
190  "Overflow detected.");
191  }
192 #endif // ENABLE_NUMERIC_CAST_CHECKS
193 
194  return static_cast<Dest>(sValue);
195 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [8/9]

std::enable_if_t< std::is_unsigned<Dest>::value && std::is_signed<Source>::value && std::is_integral<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 206 of file NumericCast.hpp.

207 {
208 #if ENABLE_NUMERIC_CAST_CHECKS
209  if (sValue < 0)
210  {
211  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
212  "Underflow detected.");
213  }
214 
215  if (static_cast< typename std::make_unsigned<Source>::type >(sValue) > std::numeric_limits<Dest>::max())
216  {
217  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to unsigned type. "
218  "Overflow detected.");
219  }
220 #endif // ENABLE_NUMERIC_CAST_CHECKS
221  return static_cast<Dest>(sValue);
222 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ numeric_cast() [9/9]

std::enable_if_t< std::is_unsigned<Dest>::value && std::is_floating_point<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 230 of file NumericCast.hpp.

231 {
232 #if ENABLE_NUMERIC_CAST_CHECKS
233  if (sValue < 0)
234  {
235  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
236  "Underflow detected.");
237  }
238 
239  if (sValue > static_cast<Source>(std::numeric_limits<Dest>::max()))
240  {
241  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to unsigned type. "
242  "Overflow detected.");
243  }
244 #endif // ENABLE_NUMERIC_CAST_CHECKS
245  return static_cast<Dest>(sValue);
246 }

References ARMNN_NUMERIC_CAST_CHECK.

◆ Offset()

unsigned int armnn::Offset ( const TensorShape shape,
unsigned int  batch,
unsigned int  height,
unsigned int  width,
unsigned int  channels,
const DataLayoutIndexed dataLayout 
)

Definition at line 15 of file BatchToSpaceNd.cpp.

21 {
22  // 3D Tensors
23  unsigned int channelDimension3D = dataLayout.GetDataLayout() == DataLayout::NCHW ? 1 : 2;
24  if (shape.GetNumDimensions() == 3)
25  {
26  return (batch * shape[dataLayout.GetHeightIndex()] + height) * shape[channelDimension3D] + channels;
27  }
28  // 4D Tensors
29  else if (shape.GetNumDimensions() == 4)
30  {
31  if (dataLayout.GetDataLayout() == DataLayout::NHWC)
32  {
33  return ((batch * shape[dataLayout.GetHeightIndex()] + height) *
34  shape[dataLayout.GetWidthIndex()] + width) *
35  shape[dataLayout.GetChannelsIndex()] + channels;
36  }
37  else
38  {
39  return ((batch * shape[dataLayout.GetChannelsIndex()] + channels) *
40  shape[dataLayout.GetHeightIndex()] + height) *
41  shape[dataLayout.GetWidthIndex()] + width;
42  }
43  }
44  else
45  {
46  throw InvalidArgumentException("Tensor rank must be either 3 or 4", CHECK_LOCATION());
47  }
48 }

References CHECK_LOCATION, DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumDimensions(), and DataLayoutIndexed::GetWidthIndex().

Referenced by BatchToSpaceNd().

◆ operator<<() [1/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const armnn::TensorShape shape 
)
inline

Definition at line 335 of file TypesUtils.hpp.

336 {
337  os << "[";
338  if (shape.GetDimensionality() != Dimensionality::NotSpecified)
339  {
340  for (uint32_t i = 0; i < shape.GetNumDimensions(); ++i)
341  {
342  if (i != 0)
343  {
344  os << ",";
345  }
346  if (shape.GetDimensionSpecificity(i))
347  {
348  os << shape[i];
349  }
350  else
351  {
352  os << "?";
353  }
354  }
355  }
356  else
357  {
358  os << "Dimensionality Not Specified";
359  }
360  os << "]";
361  return os;
362 }

References TensorShape::GetDimensionality(), TensorShape::GetDimensionSpecificity(), TensorShape::GetNumDimensions(), and NotSpecified.

◆ operator<<() [2/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const BackendId id 
)
inline

Definition at line 176 of file BackendId.hpp.

177 {
178  os << id.Get();
179  return os;
180 }

◆ operator<<() [3/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const BackendVersion backendVersion 
)
inline

Definition at line 70 of file IBackendInternal.hpp.

71 {
72  os << "[" << backendVersion.m_Major << "." << backendVersion.m_Minor << "]";
73 
74  return os;
75 }

References BackendVersion::m_Major, and BackendVersion::m_Minor.

◆ operator<<() [4/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const BFloat16 b 
)
inline

Definition at line 122 of file BFloat16.hpp.

123 {
124  os << b.ToFloat32() << "(0x" << std::hex << b.Val() << ")";
125  return os;
126 }

References BFloat16::ToFloat32(), and BFloat16::Val().

◆ operator<<() [5/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const Compute compute 
)
inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 69 of file BackendId.hpp.

70 {
71  os << GetComputeDeviceAsCString(compute);
72  return os;
73 }

References GetComputeDeviceAsCString().

◆ operator<<() [6/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const std::set< Compute > &  compute 
)
inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 58 of file BackendId.hpp.

59 {
60  for (const Compute& comp : compute)
61  {
62  os << GetComputeDeviceAsCString(comp) << " ";
63  }
64  return os;
65 }

References GetComputeDeviceAsCString().

◆ operator<<() [7/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const std::vector< Compute > &  compute 
)
inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 47 of file BackendId.hpp.

48 {
49  for (const Compute& comp : compute)
50  {
51  os << GetComputeDeviceAsCString(comp) << " ";
52  }
53  return os;
54 }

References GetComputeDeviceAsCString().

◆ operator<<() [8/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const TContainer< BackendId, TContainerTemplateArgs... > &  ids 
)

Definition at line 183 of file BackendId.hpp.

185 {
186  os << '[';
187  for (const auto& id : ids) { os << id << " "; }
188  os << ']';
189  return os;
190 }

◆ operator<<() [9/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
Status  stat 
)
inline

Definition at line 328 of file TypesUtils.hpp.

329 {
330  os << GetStatusAsCString(stat);
331  return os;
332 }

References GetStatusAsCString().

◆ Optimize() [1/4]

IOptimizedNetworkPtr Optimize ( const Graph inGraph,
const std::vector< BackendId > &  backendPreferences,
const IDeviceSpec deviceSpec,
const OptimizerOptions options,
Optional< std::vector< std::string > & >  messages = EmptyOptional() 
)

Accept legacy OptimizerOptions.

Definition at line 1883 of file Network.cpp.

1888 {
1889  return Optimize(inGraph,
1890  backendPreferences,
1891  deviceSpec,
1892  OptimizerOptionsOpaque(options),
1893  messages);
1894 }

References Optimize().

◆ Optimize() [2/4]

IOptimizedNetworkPtr Optimize ( const Graph inGraph,
const std::vector< BackendId > &  backendPreferences,
const IDeviceSpec deviceSpec,
const OptimizerOptionsOpaque options,
Optional< std::vector< std::string > & >  messages = EmptyOptional() 
)

Create an optimized version of the network.

Parameters
inGraphGraph to be optimized.
backendPreferencesThe choice of the backend ordered by user preferences.
deviceSpecDeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
messagesIf there are failures or warnings a string describing same will be added to the vector
optionsOptimizerOptions object with optimizer configuration options
Returns
An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from armnn::Exception if process fails.

Definition at line 1896 of file Network.cpp.

1901 {
1902  ARMNN_LOG(debug) << options.ToString();
1903 
1904  // Enable profiling
1905  auto profiler = inGraph.GetProfiler();
1906  ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
1907  profiler->EnableProfiling(options.GetProfilingEnabled());
1908 
1909  // Some backends don't play well together. Check here before continuing.
1910  {
1911  std::set<BackendId> backendSet(backendPreferences.begin(), backendPreferences.end());
1912  // GpuFsa cannot co-exist with GpuAcc.
1913  if (backendSet.find("GpuFsa") != backendSet.end() &&
1914  backendSet.find("GpuAcc") != backendSet.end())
1915  {
1916  throw InvalidArgumentException("The backends \"GpuAcc\" and \"GpuFsa\" cannot be specified "
1917  "for the same optimized network.");
1918  }
1919  }
1920 
1921  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer");
1922  if (backendPreferences.empty())
1923  {
1924  throw InvalidArgumentException("Invoked Optimize with no backends specified");
1925  }
1926 
1927  if (options.GetReduceFp32ToBf16())
1928  {
1929  throw InvalidArgumentException("BFloat16 optimization is currently ignored. In order to use Bf16 optimization "
1930  "Please use the FastMathEnabled backend option for CpuAcc or GpuAcc.");
1931  }
1932 
1933  if (options.GetReduceFp32ToFp16() && options.GetReduceFp32ToBf16())
1934  {
1935  throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
1936  }
1937 
1938  // Ensure TensorInfo is set on all output slots of ConstantLayers in the graph
1939  inGraph.VerifyConstantLayerSetTensorInfo();
1940 
1941  std::unique_ptr<Graph> graph = std::make_unique<Graph>(inGraph);
1942 
1943  // We need to pass on the information about whether import and export is enabled to the LoadNetwork phase.
1944  // The mechanism to do that is to add model options to the optimized network.
1945  armnn::BackendOptions importExport("Global",
1946  {{"ImportEnabled", options.GetImportEnabled()},
1947  {"ExportEnabled", options.GetExportEnabled()}});
1948  ModelOptions optimizedOptions(options.GetModelOptions());
1949  optimizedOptions.push_back(importExport);
1950 
1951  auto optNet = IOptimizedNetworkPtr(new IOptimizedNetwork(std::move(graph), optimizedOptions),
1952  &IOptimizedNetwork::Destroy);
1953 
1954  IOptimizedNetwork* optNetObjPtr = optNet.get();
1955 
1956  // Get the optimized graph
1957  Graph& optGraph = optNetObjPtr->pOptimizedNetworkImpl->GetGraph();
1958 
1959  if(options.GetShapeInferenceMethod() == ShapeInferenceMethod::InferAndValidate)
1960  {
1961  // Infer the tensor infos for all output slots. Throws an exception on failure
1962  optGraph.InferTensorInfos();
1963  }
1964 
1965  // Perform BroadcastToOptimizationLayer and then AddBroadcastReshapeLayer optimisation
1966  using namespace optimizations;
1967  Optimizer::Pass(optGraph, MakeOptimizations(BroadcastToOptimizationLayer()));
1968 
1969  Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer()));
1970 
1971  if(options.GetShapeInferenceMethod() == ShapeInferenceMethod::ValidateOnly)
1972  {
1973  // Validate the tensor infos for all output slots. Throws an exception on failure
1974  optGraph.InferTensorInfos();
1975  }
1976 
1977 
1978  // Group Constant Layer optimizations together where possible.
1979  // This is important as:
1980  // FusePermuteIntoConstantLayer must happen before FoldPadIntoDepthwiseConvolution2d and
1981  // FuseBatchNormIntoDepthwiseConvolution2D.
1982  // ConvertConstDequantisationLayersToConstLayers must happen before FoldPadIntoConvolution2d
1983  Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer(),
1985  // Perform optimisation passes
1986  Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
1991  MovePermuteUp(),
1992  MoveTransposeUp(),
1993  PermuteAsReshape(),
2006 
2007  // Initialize backend settings
2008  BackendSettings backendSettings(backendPreferences, deviceSpec);
2009  auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
2010  if (availablePreferredBackends.empty())
2011  {
2012  std::stringstream failureMsg;
2013  failureMsg << "None of the preferred backends " << backendPreferences
2014  << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
2015  ReportError(failureMsg.str(), messages);
2016  throw InvalidArgumentException(failureMsg.str());
2017  }
2018 
2019  // Create a map to temporarily hold initialized backend objects
2020  TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
2021  BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
2022 
2023  if (options.GetReduceFp32ToFp16())
2024  {
2025  bool hasFp16 = CheckFp16Support(backends, availablePreferredBackends);
2026  if (hasFp16)
2027  {
2028  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
2029  Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
2030  Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
2031  }
2032  }
2033 
2034  // Assign an available backend to each layer
2035  Graph::Iterator firstLayer = optGraph.begin();
2036  Graph::Iterator lastLayer = optGraph.end();
2037  OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr->pOptimizedNetworkImpl.get(),
2038  backendSettings,
2039  firstLayer,
2040  lastLayer,
2041  messages);
2042  if (assignBackendsResult.m_Error)
2043  {
2044  // Failed to assign a backend to each layer
2045  throw InvalidArgumentException("Failed to assign a backend to each layer");
2046  }
2047 
2048  Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
2050 
2051  // Apply the backend-specific optimizations
2052  OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr->pOptimizedNetworkImpl.get(),
2053  backendSettings,
2054  backends,
2055  options.GetModelOptions(),
2056  messages);
2057  if (backendOptimizationResult.m_Error)
2058  {
2059  // Failed to apply the backend-specific optimizations
2060  throw InvalidArgumentException("Failed to apply the backend-specific optimizations");
2061  }
2062 
2063  // Convert constants
2064  {
2065  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ConvertConstants");
2066  Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
2067  Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
2068  }
2069 
2070  // This must occur after all topological changes to the graph and any redirection of variables
2071  // If the debug flag is set, then insert a DebugLayer after each layer
2072  // Doing this after applying the backend optimizations as they might have changed some layers
2073  if (options.GetDebugEnabled() && !options.GetDebugToFileEnabled())
2074  {
2075  Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
2076  }
2077  else if (options.GetDebugToFileEnabled())
2078  {
2079  // Setup the output file path
2080  try
2081  {
2082 #if !defined(ARMNN_DISABLE_FILESYSTEM)
2083  auto result = armnnUtils::Filesystem::CreateDirectory("/ArmNNIntermediateLayerOutputs");
2084  ARMNN_LOG(info) << "Intermediate tensors will be written to: " << result;
2085 #endif
2086  Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugToFileLayer()));
2087  }
2088  catch (const armnn::RuntimeException& e)
2089  {
2090  // If we cannot create the output directory then we'll issue a warning and continue.
2091  ARMNN_LOG(warning) << "Unable to print intermediate layer outputs : " << e.what();
2092  }
2093  }
2094 
2095  // Calculate the compatibility strategies for tensor handles
2096  OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
2097  backends,
2098  tensorHandleFactoryRegistry,
2099  options.GetImportEnabled(),
2100  options.GetExportEnabled(),
2101  messages);
2102 
2103  if (strategyResult.m_Error)
2104  {
2105  // Failed to apply the backend-specific optimizations
2106  return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
2107  }
2108 
2109  // Based on the tensor handle strategy determined above, insert copy layers where required.
2110  {
2111  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AddCompatibilityLayers");
2112  optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
2113  }
2114 
2115  return optNet;
2116 }

References Graph::AddCompatibilityLayers(), ApplyBackendOptimizations(), ARMNN_LOG, ARMNN_SCOPED_PROFILING_EVENT, AssignBackends(), Graph::begin(), CheckFp16Support(), armnnUtils::Filesystem::CreateDirectory(), CreateSupportedBackends(), debug, IOptimizedNetwork::Destroy(), Graph::end(), BackendSettings::GetAvailablePreferredBackends(), OptimizerOptionsOpaque::GetDebugEnabled(), OptimizerOptionsOpaque::GetDebugToFileEnabled(), OptimizerOptionsOpaque::GetExportEnabled(), OptimizerOptionsOpaque::GetImportEnabled(), ProfilerManager::GetInstance(), OptimizerOptionsOpaque::GetModelOptions(), Graph::GetProfiler(), OptimizerOptionsOpaque::GetProfilingEnabled(), OptimizerOptionsOpaque::GetReduceFp32ToBf16(), OptimizerOptionsOpaque::GetReduceFp32ToFp16(), OptimizerOptionsOpaque::GetShapeInferenceMethod(), InferAndValidate, Graph::InferTensorInfos(), info, OptimizationResult::m_Error, BackendSettings::m_SupportedBackends, MakeOptimizations(), Optimizer::Pass(), IOptimizedNetwork::pOptimizedNetworkImpl, ProfilerManager::RegisterProfiler(), ReportError(), SelectTensorHandleStrategy(), OptimizerOptionsOpaque::ToString(), Undefined, ValidateOnly, Graph::VerifyConstantLayerSetTensorInfo(), warning, and Exception::what().

◆ Optimize() [3/4]

IOptimizedNetworkPtr Optimize ( const INetwork network,
const std::vector< BackendId > &  backendPreferences,
const IDeviceSpec deviceSpec,
const OptimizerOptions options,
Optional< std::vector< std::string > & >  messages = EmptyOptional() 
)

Accept legacy OptimizerOptions.

Definition at line 2119 of file Network.cpp.

2124 {
2125  return Optimize(inNetwork,
2126  backendPreferences,
2127  deviceSpec,
2128  OptimizerOptionsOpaque(options),
2129  messages);
2130 }

References Optimize().

◆ Optimize() [4/4]

IOptimizedNetworkPtr Optimize ( const INetwork network,
const std::vector< BackendId > &  backendPreferences,
const IDeviceSpec deviceSpec,
const OptimizerOptionsOpaque options = OptimizerOptionsOpaque(),
Optional< std::vector< std::string > & >  messages = EmptyOptional() 
)

Create an optimized version of the network.

Parameters
networkINetwork description of the network to be optimized.
backendPreferencesThe choice of the backend ordered by user preferences.
deviceSpecDeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
messagesIf there are failures or warnings a string describing same will be added to the vector
optionsOptimizerOptions object with optimizer configuration options
Returns
An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from armnn::Exception if process fails.
Examples
AsyncExecutionSample.cpp, CustomMemoryAllocatorSample.cpp, DynamicSample.cpp, and SimpleSample.cpp.

Definition at line 2132 of file Network.cpp.

2137 {
2138  return Optimize(inNetwork.pNetworkImpl->GetGraph(),
2139  backendPreferences,
2140  deviceSpec,
2141  options,
2142  messages);
2143 }

References INetwork::pNetworkImpl.

Referenced by Optimize(), ArmnnDriverImpl::PrepareArmnnModel(), ArmnnDriverImpl::PrepareArmnnModelFromCache(), ParserPrototxtFixture< TParser >::Setup(), and ParserPrototxtFixture< TParser >::SetupOptimizedNetwork().

◆ Pad()

void Pad ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const ITensorHandle inputHandle,
ITensorHandle outputHandle,
const PadQueueDescriptor data 
)

Definition at line 39 of file Pad.cpp.

44 {
45  auto padList = data.m_Parameters.m_PadList;
46  auto padValue = data.m_Parameters.m_PadValue;
47 
48  unsigned int numOutputElements = outputInfo.GetNumElements();
49 
50  TensorShape outputShape = outputInfo.GetShape();
51  TensorShape inputShape = inputInfo.GetShape();
52 
53  unsigned int numInputDimensions = inputShape.GetNumDimensions();
54 
55 #ifndef NDEBUG
56 
57  unsigned int numOutputDimensions = outputShape.GetNumDimensions();
58  assert(numInputDimensions == numOutputDimensions);
59 
60 #endif
61 
62  unsigned int inputBatches = 0;
63  unsigned int inputChannels = 0;
64  unsigned int inputHeight = 0;
65  unsigned int inputWidth = 0;
66 
67  unsigned int outputChannels = 0;
68  unsigned int outputHeight = 0;
69  unsigned int outputWidth = 0;
70 
71  auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
72  auto outData = MakeEncoder<float>(outputInfo, outputHandle->Map());
73 
74  // Fill the output tensor with Pad value first
75  if (outputInfo.IsQuantized())
76  {
77  // For Quantized types Pad Value should not be quantized with scale and offset of the tensor info
78  auto temporaryInfo = TensorInfo(outputInfo.GetShape(), outputInfo.GetDataType(), 1.0f, 0);
79  auto outputData = MakeEncoder<float>(temporaryInfo, outputHandle->Map());
80  FillOutputWithPadValue(*outputData, padValue, numOutputElements);
81  }
82  else
83  {
84  FillOutputWithPadValue(*outData, padValue, numOutputElements);
85  }
86 
87  Decoder<float>& input = *inputData;
88  Encoder<float>& output = *outData;
89 
90  switch(numInputDimensions) {
91 
92  case 1:
93  inputWidth = inputShape[0];
94  for (unsigned int w = 0; w < inputWidth ; w++)
95  {
96  input[w];
97  auto inputValue = input.Get();
98  auto outputIndex = w + std::get<0>(padList[0]);
99  output[outputIndex];
100  output.Set(inputValue);
101  }
102 
103  break;
104  case 2 :
105  inputHeight = inputShape[0];
106  inputWidth = inputShape[1];
107  outputWidth = outputShape[1];
108 
109  for (unsigned int h = 0; h < inputHeight; h++)
110  {
111  for (unsigned int w = 0; w < inputWidth ; w++)
112  {
113  input[h * inputWidth + w];
114  auto inputValue = input.Get();
115  auto outputIndex = (h + std::get<0>(padList[0])) * outputWidth + (w + std::get<0>(padList[1]));
116  output[outputIndex];
117  output.Set(inputValue);
118  }
119  }
120 
121  break;
122  case 3 :
123  inputChannels = inputShape[0];
124  inputHeight = inputShape[1];
125  inputWidth = inputShape[2];
126  outputHeight = outputShape[1];
127  outputWidth = outputShape[2];
128 
129  for (unsigned int c = 0; c < inputChannels; c++)
130  {
131  for (unsigned int h = 0; h < inputHeight; h++)
132  {
133  for (unsigned int w = 0; w < inputWidth ; w++)
134  {
135  input[c * inputHeight * inputWidth + h * inputWidth + w];
136  auto inputValue = input.Get();
137  auto outputIndex = (c + std::get<0>(padList[0])) * outputHeight * outputWidth
138  + (h + std::get<0>(padList[1])) * outputWidth
139  + (w + std::get<0>(padList[2]));
140  output[outputIndex];
141  output.Set(inputValue);
142  }
143  }
144  }
145 
146  break;
147  case 4 :
148  inputBatches = inputShape[0];
149  inputChannels = inputShape[1];
150  inputHeight = inputShape[2];
151  inputWidth = inputShape[3];
152  outputChannels = outputShape[1];
153  outputHeight = outputShape[2];
154  outputWidth = outputShape[3];
155 
156  for (unsigned int b = 0; b < inputBatches; b++)
157  {
158  for (unsigned int c = 0; c < inputChannels; c++)
159  {
160  for (unsigned int h = 0; h < inputHeight; h++)
161  {
162  for (unsigned int w = 0; w < inputWidth ; w++)
163  {
164  input[b * inputChannels * inputHeight * inputWidth
165  + c * inputHeight * inputWidth
166  + h * inputWidth
167  + w];
168  auto inputValue = input.Get();
169  auto outputIndex = (b + std::get<0>(padList[0]))
170  * outputChannels * outputHeight * outputWidth
171  + (c + std::get<0>(padList[1])) * outputHeight * outputWidth
172  + (h + std::get<0>(padList[2])) * outputWidth
173  + (w + std::get<0>(padList[3]));
174  output[outputIndex];
175  output.Set(inputValue);
176  }
177  }
178  }
179  }
180 
181  break;
182  default :
183  break;
184  }
185 }

References Decoder< IType >::Get(), TensorInfo::GetDataType(), TensorShape::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), TensorInfo::IsQuantized(), PadDescriptor::m_PadList, PadDescriptor::m_PadValue, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, ITensorHandle::Map(), and Encoder< IType >::Set().

◆ ParseBooleanBackendOption()

bool armnn::ParseBooleanBackendOption ( const armnn::BackendOptions::Var value,
bool  defaultValue 
)
inline

Definition at line 312 of file BackendOptions.hpp.

313 {
314  if (value.IsBool())
315  {
316  return value.AsBool();
317  }
318  return defaultValue;
319 }

References BackendOptions::Var::AsBool(), and BackendOptions::Var::IsBool().

◆ ParseComputeDevice()

constexpr armnn::Compute armnn::ParseComputeDevice ( const char *  str)
constexpr

Deprecated function that will be removed together with the Compute enum.

Definition at line 213 of file TypesUtils.hpp.

214 {
215  if (armnn::StrEqual(str, "CpuAcc"))
216  {
217  return armnn::Compute::CpuAcc;
218  }
219  else if (armnn::StrEqual(str, "CpuRef"))
220  {
221  return armnn::Compute::CpuRef;
222  }
223  else if (armnn::StrEqual(str, "GpuAcc"))
224  {
225  return armnn::Compute::GpuAcc;
226  }
227  else
228  {
230  }
231 }

References CpuAcc, CpuRef, GpuAcc, StrEqual(), and Undefined.

◆ ParseIntBackendOption()

int armnn::ParseIntBackendOption ( const armnn::BackendOptions::Var value,
int  defaultValue 
)
inline

Definition at line 330 of file BackendOptions.hpp.

331 {
332  if (value.IsInt())
333  {
334  return value.AsInt();
335  }
336  return defaultValue;
337 }

References BackendOptions::Var::AsInt(), and BackendOptions::Var::IsInt().

Referenced by ClBackendModelContext::ClBackendModelContext().

◆ ParseOptions()

void armnn::ParseOptions ( const std::vector< BackendOptions > &  options,
BackendId  backend,
f 
)

Definition at line 297 of file BackendOptions.hpp.

298 {
299  for (auto optionsGroup : options)
300  {
301  if (optionsGroup.GetBackendId() == backend)
302  {
303  for (size_t i=0; i < optionsGroup.GetOptionCount(); i++)
304  {
305  const BackendOptions::BackendOption option = optionsGroup.GetOption(i);
306  f(option.GetName(), option.GetValue());
307  }
308  }
309  }
310 }

References BackendOptions::BackendOption::GetName(), and BackendOptions::BackendOption::GetValue().

Referenced by ClBackendContext::ClBackendContext(), ClBackendModelContext::ClBackendModelContext(), GpuFsaBackendContext::GpuFsaBackendContext(), NeonBackendModelContext::NeonBackendModelContext(), and RuntimeImpl::RuntimeImpl().

◆ ParseStringBackendOption()

std::string armnn::ParseStringBackendOption ( const armnn::BackendOptions::Var value,
std::string  defaultValue 
)
inline

Definition at line 321 of file BackendOptions.hpp.

322 {
323  if (value.IsString())
324  {
325  return value.AsString();
326  }
327  return defaultValue;
328 }

References BackendOptions::Var::AsString(), and BackendOptions::Var::IsString().

Referenced by ClBackendContext::ClBackendContext(), and GpuFsaBackendContext::GpuFsaBackendContext().

◆ ParseTuningLevel()

TuningLevel armnn::ParseTuningLevel ( const BackendOptions::Var value,
TuningLevel  defaultValue 
)
inline

Definition at line 26 of file ArmComputeTuningUtils.hpp.

27 {
28  if (value.IsInt())
29  {
30  int v = value.AsInt();
31  if (v > static_cast<int>(TuningLevel::Exhaustive) ||
32  v < static_cast<int>(TuningLevel::None))
33  {
34  ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. "
35  "Using default(" << static_cast<int>(defaultValue) << ")";
36  } else
37  {
38  return static_cast<TuningLevel>(v);
39  }
40  }
41  return defaultValue;
42 }

References ARMNN_LOG, BackendOptions::Var::AsInt(), Exhaustive, BackendOptions::Var::IsInt(), None, and warning.

Referenced by ClBackendContext::ClBackendContext(), and GpuFsaBackendContext::GpuFsaBackendContext().

◆ PermuteTensor()

armnn::ConstTensor PermuteTensor ( const ConstTensorHandle tensor,
const PermutationVector permutationVector,
void *  permuteBuffer 
)

Definition at line 18 of file WorkloadUtils.cpp.

20 {
21  if (tensor == nullptr)
22  {
23  throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null input tensor pointer");
24  }
25  if (permuteBuffer == nullptr)
26  {
27  throw armnn::InvalidArgumentException("WorkloadUtils: PermuteTensor: Null permute buffer pointer");
28  }
29 
30  TensorInfo tensorInfo = tensor->GetTensorInfo();
31 
32  if (permutationVector.GetSize() > 0)
33  {
34  tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector);
35  armnnUtils::Permute(tensorInfo.GetShape(), permutationVector,
36  tensor->GetConstTensor<void>(), permuteBuffer,
37  GetDataTypeSize(tensorInfo.GetDataType()));
38  }
39  else
40  {
41  ::memcpy(permuteBuffer, tensor->GetConstTensor<void>(), tensorInfo.GetNumBytes());
42  }
43  tensorInfo.SetConstant(true);
44  return ConstTensor(tensorInfo, permuteBuffer);
45 }

References ConstTensorHandle::GetConstTensor(), TensorInfo::GetDataType(), GetDataTypeSize(), TensorInfo::GetNumBytes(), TensorInfo::GetShape(), PermutationVector::GetSize(), ConstTensorHandle::GetTensorInfo(), armnnUtils::Permute(), armnnUtils::Permuted(), and TensorInfo::SetConstant().

Referenced by Convert1HWOTensorToAcl(), Convert1HWOtoMIHW(), and ConvertWeightTensorFromArmnnToAcl().

◆ PolymorphicDowncast()

DestType armnn::PolymorphicDowncast ( SourceType *  value)

Polymorphic downcast for build in pointers only.

Usage: Child* pChild = PolymorphicDowncast<Child*>(pBase);

Template Parameters
DestTypePointer type to the target object (Child pointer type)
SourceTypePointer type to the source object (Base pointer type)
Parameters
valuePointer to the source object
Returns
Pointer of type DestType (Pointer of type child)

Definition at line 74 of file PolymorphicDowncast.hpp.

75 {
76  static_assert(std::is_pointer<DestType>::value,
77  "PolymorphicDowncast only works with pointer types.");
78 
79  ARMNN_POLYMORPHIC_CAST_CHECK(dynamic_cast<DestType>(value) == value);
80  return static_cast<DestType>(value);
81 }

References ARMNN_POLYMORPHIC_CAST_CHECK.

Referenced by ClLayerSupport::IsLayerSupported(), and IsLayerTypeSupported().

◆ PolymorphicPointerDowncast()

auto armnn::PolymorphicPointerDowncast ( const SourceType &  value)

Polymorphic downcast for shared pointers and build in pointers.

Usage: auto pChild = PolymorphicPointerDowncast<Child>(pBase)

Template Parameters
DestTypeType of the target object (Child type)
SourceTypePointer type to the source object (Base (shared) pointer type)
Parameters
valuePointer to the source object
Returns
Pointer of type DestType ((Shared) pointer of type child)

Definition at line 93 of file PolymorphicDowncast.hpp.

94 {
95  ARMNN_POLYMORPHIC_CAST_CHECK(utility::DynamicPointerCast<DestType>(value)
96  == value);
97  return utility::StaticPointerCast<DestType>(value);
98 }

References ARMNN_POLYMORPHIC_CAST_CHECK.

◆ Pooling2d()

void Pooling2d ( Decoder< float > &  rInputDecoder,
Encoder< float > &  rOutputEncoder,
const TensorInfo inputInfo,
const TensorInfo outputInfo,
const Pooling2dDescriptor params 
)

Computes the Pooling2d operation.

Definition at line 142 of file Pooling2d.cpp.

147 {
148  const DataLayoutIndexed dataLayout(params.m_DataLayout);
149  auto channelsIndex = dataLayout.GetChannelsIndex();
150  auto heightIndex = dataLayout.GetHeightIndex();
151  auto widthIndex = dataLayout.GetWidthIndex();
152 
153  const int batchSize = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
154  const int channels = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
155  const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
156  const int widthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
157  const int heightInput = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
158  const int widthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
159  const int padLeft = armnn::numeric_cast<int>(params.m_PadLeft);
160  const int padRight = armnn::numeric_cast<int>(params.m_PadRight);
161  const int padTop = armnn::numeric_cast<int>(params.m_PadTop);
162  const int padBottom = armnn::numeric_cast<int>(params.m_PadBottom);
163  const int strideX = armnn::numeric_cast<int>(params.m_StrideX);
164  const int strideY = armnn::numeric_cast<int>(params.m_StrideY);
165  const int poolHeight = armnn::numeric_cast<int>(params.m_PoolHeight);
166  const int poolWidth = armnn::numeric_cast<int>(params.m_PoolWidth);
167 
168  float defaultInitializer = DefaultInitializer(params.m_PoolType);
169 
170  Accumulator accumulate = GetAccumulator(params.m_PoolType);
171  Executor execute = GetExecutor(params.m_PoolType);
172 
173  // Check supported padding methods outside the loop to simplify
174  // the inner loop.
175  if (params.m_PaddingMethod != PaddingMethod::Exclude &&
176  params.m_PaddingMethod != PaddingMethod::IgnoreValue)
177  {
178  throw armnn::InvalidArgumentException("Unsupported padding type");
179  }
180 
181  const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
182 
183  for (int n = 0; n < batchSize; n++)
184  {
185  for (int c = 0; c < channels; c++)
186  {
187  for (int yOutput = 0; yOutput < heightOutput; yOutput++)
188  {
189  // Calculate values independent of the x axis
190  int hstart = (yOutput * strideY) - padTop;
191  int hend = hstart + poolHeight;
192  // Clamp the pooling region inside the valid input area (which includes the padding).
193  // This is necessary because the final pooling in a row may overlap beyond the padding.
194  hend = std::min(hend, heightInput + padBottom);
195 
196  int height = hend - hstart;
197  bool hclamped = ClampRange(hstart, hend, heightInput);
198 
199  for (int xOutput = 0; xOutput < widthOutput; xOutput++)
200  {
201  int wstart = (xOutput * strideX) - padLeft;
202  int wend = wstart + poolWidth;
203 
204  // Clamp the pooling region inside the valid input area (which includes the padding).
205  // This is necessary because the final pooling in a row may overlap beyond the padding.
206  wend = std::min(wend, widthInput + padRight);
207 
208  float result = defaultInitializer;
209  float poolAreaSize = armnn::numeric_cast<float>(height * (wend - wstart));
210 
211  // Special case: when the pooling kernel is over a padding region and the padding
212  // size is larger or equal to the kernel and the kernel only covers
213  // padding and no real values, then we initialize the result as zero
214  // by convention. This is because we need to choose a value here and
215  // all values we have are padding, which we ignore.
216  if (OnPaddingOnly(hstart, hend, heightInput) ||
217  OnPaddingOnly(wstart, wend, widthInput))
218  {
219  result = 0.0f;
220 
221  int outputIndex;
222 
223  if(dataLayout.GetDataLayout() == DataLayout::NHWC)
224  {
225  outputIndex = n * heightOutput * widthOutput * channels +
226  yOutput * widthOutput * channels +
227  xOutput * channels +
228  c;
229  }
230  else
231  {
232  outputIndex = n * heightOutput * widthOutput * channels +
233  c * heightOutput * widthOutput +
234  yOutput * widthOutput +
235  xOutput;
236  }
237 
238  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
239  rOutputEncoder.Set(result);
240  continue;
241  }
242 
243  bool clamped = hclamped |= ClampRange(wstart, wend, widthInput);
244 
245  if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
246  {
247  // When we exclude the padding, it means we calculate with a smaller
248  // kernel size, so I changed the divisor here.
249  poolAreaSize = armnn::numeric_cast<float>((hend - hstart) * (wend - wstart));
250  }
251 
252  for (auto yInput = hstart; yInput < hend; yInput++)
253  {
254  for (auto xInput = wstart; xInput < wend; xInput++)
255  {
256 
257  int inputIndex;
258  if(dataLayout.GetDataLayout() == DataLayout::NHWC)
259  {
260  inputIndex = n * heightInput * widthInput * channels +
261  yInput * widthInput * channels +
262  xInput * channels +
263  c;
264 
265  }
266  else
267  {
268  inputIndex = n * heightInput * widthInput * channels +
269  c * heightInput * widthInput +
270  yInput * widthInput +
271  xInput;
272  }
273 
274  accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
275  }
276  }
277 
278  execute(result, poolAreaSize);
279 
280  int outputIndex;
281 
282  if(dataLayout.GetDataLayout() == DataLayout::NHWC)
283  {
284  outputIndex = n * heightOutput * widthOutput * channels +
285  yOutput * widthOutput * channels +
286  xOutput * channels +
287  c;
288  }
289  else
290  {
291  outputIndex = n * heightOutput * widthOutput * channels +
292  c * heightOutput * widthOutput +
293  yOutput * widthOutput +
294  xOutput;
295  }
296 
297  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
298  rOutputEncoder.Set(result);
299  }
300  }
301  }
302  }
303 }

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Pooling2dDescriptor::m_DataLayout, Pooling2dDescriptor::m_PadBottom, Pooling2dDescriptor::m_PaddingMethod, Pooling2dDescriptor::m_PadLeft, Pooling2dDescriptor::m_PadRight, Pooling2dDescriptor::m_PadTop, Pooling2dDescriptor::m_PoolHeight, Pooling2dDescriptor::m_PoolType, Pooling2dDescriptor::m_PoolWidth, Pooling2dDescriptor::m_StrideX, Pooling2dDescriptor::m_StrideY, Pooling2d(), and Encoder< IType >::Set().

Referenced by Pooling2d(), and Pooling2dLayer::Pooling2dLayer().

◆ Pooling3d()

void Pooling3d ( Decoder< float > &  rInputDecoder,
Encoder< float > &  rOutputEncoder,
const TensorInfo inputInfo,
const TensorInfo outputInfo,
const Pooling3dDescriptor params 
)

Computes the Pooling3d operation.

Definition at line 172 of file Pooling3d.cpp.

177 {
178  const DataLayoutIndexed dataLayout(params.m_DataLayout);
179 
180  auto channelsIndex = dataLayout.GetChannelsIndex();
181 
182  auto depthIndex = dataLayout.GetDepthIndex();
183  auto heightIndex = dataLayout.GetHeightIndex();
184  auto widthIndex = dataLayout.GetWidthIndex();
185 
186  const int batchSize = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
187  const int channels = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
188 
189  const int depthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[depthIndex]);
190  const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
191  const int widthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
192 
193  const int depthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[depthIndex]);
194  const int heightInput = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
195  const int widthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
196 
197  const int padLeft = armnn::numeric_cast<int>(params.m_PadLeft);
198  const int padRight = armnn::numeric_cast<int>(params.m_PadRight);
199  const int padTop = armnn::numeric_cast<int>(params.m_PadTop);
200  const int padBottom = armnn::numeric_cast<int>(params.m_PadBottom);
201  const int padFront = armnn::numeric_cast<int>(params.m_PadFront);
202  const int padBack = armnn::numeric_cast<int>(params.m_PadBack);
203 
204  const int strideX = armnn::numeric_cast<int>(params.m_StrideX);
205  const int strideY = armnn::numeric_cast<int>(params.m_StrideY);
206  const int strideZ = armnn::numeric_cast<int>(params.m_StrideZ);
207 
208  const int poolHeight = armnn::numeric_cast<int>(params.m_PoolHeight);
209  const int poolWidth = armnn::numeric_cast<int>(params.m_PoolWidth);
210  const int poolDepth = armnn::numeric_cast<int>(params.m_PoolDepth);
211 
212  float defaultInitializer = DefaultInitializer(params.m_PoolType);
213  Accumulator accumulate = GetAccumulator(params.m_PoolType);
214  Executor execute = GetExecutor(params.m_PoolType);
215 
216  // Check supported padding methods outside the loop to simplify
217  // the inner loop.
218  if (params.m_PaddingMethod != PaddingMethod::Exclude &&
219  params.m_PaddingMethod != PaddingMethod::IgnoreValue)
220  {
221  throw armnn::InvalidArgumentException("Unsupported padding type");
222  }
223 
224  const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
225 
226  for (int n = 0; n < batchSize; n++)
227  {
228  for (int c = 0; c < channels; c++)
229  {
230  for (int zOutput = 0; zOutput < depthOutput; zOutput++)
231  {
232  // Calculate values independent of the x and y axis
233  int dstart = (zOutput * strideZ) - padFront;
234  int dend = dstart + poolDepth;
235  // Clamp the pooling region inside the valid input area (which includes the padding).
236  // This is necessary because the final pooling in a row may overlap beyond the padding.
237  dend = std::min(dend, depthInput + padBack);
238 
239  int depth = dend - dstart;
240  bool dclamped = ClampRange(dstart, dend, depthInput);
241  int depthClamped = dend - dstart;
242 
243  for (int yOutput = 0; yOutput < heightOutput; yOutput++)
244  {
245  int hstart = (yOutput * strideY) - padTop;
246  int hend = hstart + poolHeight;
247  // Clamp the pooling region inside the valid input area (which includes the padding).
248  // This is necessary because the final pooling in a row may overlap beyond the padding.
249  hend = std::min(hend, heightInput + padBottom);
250 
251  int height = hend - hstart;
252  bool hclamped = ClampRange(hstart, hend, heightInput);
253  int heightClamped = hend - hstart;
254 
255  for (int xOutput = 0; xOutput < widthOutput; xOutput++)
256  {
257  int wstart = (xOutput * strideX) - padLeft;
258  int wend = wstart + poolWidth;
259  // Clamp the pooling region inside the valid input area (which includes the padding).
260  // This is necessary because the final pooling in a row may overlap beyond the padding.
261  wend = std::min(wend, widthInput + padRight);
262 
263  int width = wend - wstart;
264  bool wclamped = ClampRange(wstart, wend, widthInput);
265  int widthClamped = wend - wstart;
266 
267  float result = defaultInitializer;
268  float poolAreaSize = armnn::numeric_cast<float>(depth * height * width);
269 
270  // Special case: when the pooling kernel is over a padding region and the padding
271  // size is larger or equal to the kernel and the kernel only covers
272  // padding and no real values, then we initialize the result as zero
273  // by convention. This is because we need to choose a value here and
274  // all values we have are padding, which we ignore.
275  if (OnPaddingOnly(dstart, dend, depthInput) ||
276  OnPaddingOnly(hstart, hend, heightInput) ||
277  OnPaddingOnly(wstart, wend, widthInput))
278  {
279  result = 0.0f;
280 
281  int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
282  n, c, zOutput, yOutput, xOutput, dataLayout);
283 
284  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
285  rOutputEncoder.Set(result);
286 
287  continue;
288  }
289 
290  bool clamped = (dclamped | hclamped | wclamped);
291 
292  if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
293  {
294  // When we exclude the padding, it means we calculate with a smaller
295  // kernel size, so I changed the divisor here.
296  poolAreaSize = armnn::numeric_cast<float>(depthClamped * heightClamped * widthClamped);
297  }
298 
299  for (auto zInput = dstart; zInput < dend; zInput++)
300  {
301  for (auto yInput = hstart; yInput < hend; yInput++)
302  {
303  for (auto xInput = wstart; xInput < wend; xInput++)
304  {
305 
306  int inputIndex = CalculateIndex(channels, depthInput, heightInput, widthInput,
307  n, c, zInput, yInput, xInput, dataLayout);
308 
309  accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
310  }
311  }
312  }
313 
314  execute(result, poolAreaSize);
315 
316  int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
317  n, c, zOutput, yOutput, xOutput, dataLayout);
318 
319  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
320  rOutputEncoder.Set(result);
321  }
322  }
323  }
324  }
325  }
326 }

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDepthIndex(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Pooling3dDescriptor::m_DataLayout, Pooling3dDescriptor::m_PadBack, Pooling3dDescriptor::m_PadBottom, Pooling3dDescriptor::m_PaddingMethod, Pooling3dDescriptor::m_PadFront, Pooling3dDescriptor::m_PadLeft, Pooling3dDescriptor::m_PadRight, Pooling3dDescriptor::m_PadTop, Pooling3dDescriptor::m_PoolDepth, Pooling3dDescriptor::m_PoolHeight, Pooling3dDescriptor::m_PoolType, Pooling3dDescriptor::m_PoolWidth, Pooling3dDescriptor::m_StrideX, Pooling3dDescriptor::m_StrideY, Pooling3dDescriptor::m_StrideZ, Pooling3d(), and Encoder< IType >::Set().

Referenced by Pooling3d(), and Pooling3dLayer::Pooling3dLayer().

◆ PreluImpl()

void PreluImpl ( const TensorInfo inputInfo,
const TensorInfo alphaInfo,
const TensorInfo outputInfo,
Decoder< float > &  inputData,
Decoder< float > &  alphaData,
Encoder< float > &  outputData 
)

Definition at line 13 of file PreluImpl.cpp.

19 {
20  const TensorShape& inputShape = inputInfo.GetShape();
21  const TensorShape& alphaShape = alphaInfo.GetShape();
22  const TensorShape& outputShape = outputInfo.GetShape();
23 
24  // PReLU activation: f(x) = alpha * x for x < 0, f(x) = x for x >= 0
25  auto prelu = [](float x, float alpha)
26  {
27  return x < 0 ? alpha * x : x;
28  };
29 
30  BroadcastLoop(inputShape, alphaShape, outputShape).Unroll(prelu, 0, inputData, alphaData, outputData);
31 }

References TensorInfo::GetShape(), and BroadcastLoop::Unroll().

◆ PrintOutput()

void armnn::PrintOutput ( const TensorInfo inputInfo,
const T *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex,
std::ostream &  os 
)

Definition at line 23 of file Debug.cpp.

29 {
30  const unsigned int numDims = inputInfo.GetNumDimensions();
31  const unsigned int numElements = inputInfo.GetNumElements();
32  const TensorShape& inputShape = inputInfo.GetShape();
33 
34  std::vector<unsigned int> strides(numDims, 0);
35  strides[numDims - 1] = inputShape[numDims - 1];
36 
37  for (unsigned int i = 2; i <= numDims; i++)
38  {
39  strides[numDims - i] = strides[numDims - i + 1] * inputShape[numDims - i];
40  }
41 
42  os << "{ ";
43  os << "\"layerGuid\": " << guid << ", ";
44  os << "\"layerName\": \"" << layerName << "\", ";
45  os << "\"outputSlot\": " << slotIndex << ", ";
46  os << "\"shape\": ";
47 
48  os << "[";
49  for (unsigned int i = 0; i < numDims; i++)
50  {
51  os << inputShape[i];
52  if (i != numDims - 1)
53  {
54  os << ", ";
55  }
56  }
57  os << "], ";
58 
59  os << "\"min\": "
60  << static_cast<float>(*std::min_element(inputData, inputData + numElements)) << ", ";
61 
62  os << "\"max\": "
63  << static_cast<float>(*std::max_element(inputData, inputData + numElements)) << ", ";
64 
65  os << "\"data\": ";
66 
67  for (unsigned int i = 0; i < numElements; i++)
68  {
69  for (unsigned int j = 0; j < numDims; j++)
70  {
71  if (i % strides[j] == 0)
72  {
73  os << "[";
74  }
75  }
76 
77  os << static_cast<float>(inputData[i]);
78 
79  for (unsigned int j = 0; j < numDims; j++)
80  {
81  if ((i + 1) % strides[j] == 0)
82  {
83  os << "]";
84  }
85  }
86 
87  if (i != numElements - 1)
88  {
89  os << ", ";
90  }
91  }
92 
93  os << " }" << std::endl;
94 }

References TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), and TensorInfo::GetShape().

◆ ProfilingUpdateDescriptions()

void armnn::ProfilingUpdateDescriptions ( const std::string &  name,
const DescriptorType &  desc,
const WorkloadInfo infos,
const arm::pipe::ProfilingGuid  guid 
)
inline

< Profiler used

Definition at line 180 of file Profiling.hpp.

184 {
185  IProfiler* profiler(ProfilerManager::GetInstance().GetProfiler()); ///< Profiler used
186  if (profiler && profiler->IsProfilingEnabled())
187  {
188  profiler->AddLayerDetails(name, desc, infos, guid);
189  }
190 }

References ProfilerManager::GetInstance(), and IProfiler::IsProfilingEnabled().

◆ Quantize() [1/2]

template int32_t Quantize< int32_t > ( float  value,
float  scale,
int32_t  offset 
)

Quantize a floating point data type into an 8-bit data type.

Explicit specialization of Quantize for int32_t.

Explicit specialization of Quantize for int16_t.

Explicit specialization of Quantize for uint8_t.

Explicit specialization of Quantize for int8_t.

Parameters
value- The value to quantize.
scale- The scale (must be non-zero).
offset- The offset.
Returns
- The quantized value calculated as round(value/scale)+offset.

Definition at line 30 of file TypesUtils.cpp.

31 {
32  static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
33  constexpr QuantizedType max = std::numeric_limits<QuantizedType>::max();
34  constexpr QuantizedType min = std::numeric_limits<QuantizedType>::lowest();
35  if (scale == 0.f)
36  {
37  throw armnn::InvalidArgumentException("Quantize: Scale cannot be 0.f");
38  }
39  if (std::isnan(value))
40  {
41  throw armnn::InvalidArgumentException("Quantize: Value is NaN");
42  }
43 
44  float clampedValue = std::min(std::max((static_cast<float>(offset) + static_cast<float>(round(value/scale))),
45  static_cast<float>(min)), static_cast<float>(max));
46  auto quantizedBits = static_cast<QuantizedType>(clampedValue);
47 
48  return quantizedBits;
49 }

◆ Quantize() [2/2]

void armnn::Quantize ( uint8_t *  quant,
const float *  dequant,
const TensorInfo info 
)
inline

Definition at line 121 of file RefWorkloadUtils.hpp.

122 {
123  for (size_t i = 0; i < info.GetNumElements(); i++)
124  {
125  quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
126  }
127 }

References info.

◆ Reduce()

void Reduce ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
Decoder< float > &  input,
Encoder< float > &  output,
const std::vector< uint32_t >  axis,
const ReduceOperation  reduceOperation 
)

Definition at line 70 of file Reduce.cpp.

76 {
77  armnn::TensorShape inputDims = inputInfo.GetShape();
78  unsigned int inputNumDims = inputInfo.GetNumDimensions();
79  unsigned int numOutputs = outputInfo.GetNumElements();
80 
81  // Initialise temp output
82  std::vector<float> tempOut(numOutputs);
83  switch(reduceOperation)
84  {
85  case ReduceOperation::Mean:
86  case ReduceOperation::Sum:
87  std::fill(tempOut.begin(), tempOut.end(), 0.0f);
88  break;
89  case ReduceOperation::Prod:
90  std::fill(tempOut.begin(), tempOut.end(), 1.0f);
91  break;
92  case ReduceOperation::Max:
93  std::fill(tempOut.begin(), tempOut.end(), -1 * std::numeric_limits<float>::max());
94  break;
95  case ReduceOperation::Min:
96  std::fill(tempOut.begin(), tempOut.end(), std::numeric_limits<float>::max());
97  break;
98  default:
99  throw armnn::InvalidArgumentException("Unknown reduce method: " +
100  std::to_string(static_cast<int>(reduceOperation)));
101  }
102 
103  // Initialise temp index
104  std::vector<unsigned int> tempIndex(inputNumDims, 0);
105 
106  std::vector<unsigned int> resolvedAxis = axis;
107  if (resolvedAxis.empty())
108  {
109  for (unsigned int idx = 0; idx < inputNumDims; ++idx)
110  {
111  resolvedAxis.push_back(idx);
112  }
113  }
114  auto numResolvedAxis = armnn::numeric_cast<unsigned int>(resolvedAxis.size());
115 
116  // Iterates through input_data and operates over the reduced axis
117  for (bool hasNext = true; hasNext; hasNext = NextIndex(inputNumDims, inputDims, tempIndex))
118  {
119  unsigned int inputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, 0, {});
120  unsigned int outputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex,
121  numResolvedAxis, resolvedAxis);
122  input[inputOffset];
123  auto inputValue = input.Get();
124  switch(reduceOperation)
125  {
126  case ReduceOperation::Mean:
127  case ReduceOperation::Sum:
128  tempOut[outputOffset] += inputValue;
129  break;
130  case ReduceOperation::Prod:
131  tempOut[outputOffset] *= inputValue;
132  break;
133  case ReduceOperation::Max:
134  if (inputValue > tempOut[outputOffset])
135  {
136  tempOut[outputOffset] = inputValue;
137  }
138  break;
139  case ReduceOperation::Min:
140  if (inputValue < tempOut[outputOffset])
141  {
142  tempOut[outputOffset] = inputValue;
143  }
144  break;
145  default:
146  throw armnn::InvalidArgumentException("Unknown reduce method: " +
147  std::to_string(static_cast<int>(reduceOperation)));
148  }
149  }
150 
151  // Takes average by num of elements added to get MEAN
152  size_t numElementsInAxis = 1;
153  for (unsigned int idx = 0; idx < numResolvedAxis; ++idx)
154  {
155  unsigned int current = inputDims[resolvedAxis[idx]];
156  ARMNN_ASSERT(armnn::numeric_cast<float>(current) <
157  (std::numeric_limits<float>::max() / armnn::numeric_cast<float>(numElementsInAxis)));
158  numElementsInAxis *= current;
159  }
160 
161  for (unsigned int idx = 0; idx < numOutputs; ++idx)
162  {
163  output[idx];
164  if (reduceOperation == ReduceOperation::Mean)
165  {
166  if (numElementsInAxis > 0)
167  {
168  output.Set(tempOut[idx] / armnn::numeric_cast<float>(numElementsInAxis));
169  }
170  }
171  else
172  {
173  output.Set(tempOut[idx]);
174  }
175  }
176 }

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), Max, Mean, Min, NextIndex(), Prod, ReducedOutputOffset(), Encoder< IType >::Set(), and Sum.

◆ ReducedOutputOffset()

unsigned int armnn::ReducedOutputOffset ( const unsigned int  numDims,
const armnn::TensorShape dims,
std::vector< unsigned int > &  index,
const unsigned int  numAxis,
const std::vector< unsigned int > &  axis 
)

Definition at line 40 of file Reduce.cpp.

45 {
46  unsigned int offset = 0;
47  for (unsigned int idx = 0; idx < numDims; ++idx)
48  {
49  bool isAxis = false;
50  if (!axis.empty())
51  {
52  for (unsigned int axisIdx = 0; axisIdx < numAxis; ++axisIdx)
53  {
54  if (idx == axis[axisIdx])
55  {
56  isAxis = true;
57  break;
58  }
59  }
60  }
61  if (!isAxis)
62  {
63  offset = offset * dims[idx] + index[idx];
64  }
65  }
66  return offset;
67 }

Referenced by Reduce().

◆ RefBackendId()

constexpr const char* armnn::RefBackendId ( )
constexpr

Definition at line 10 of file RefBackendId.hpp.

10 { return "CpuRef"; }

Referenced by RefBackend::GetIdStatic().

◆ RefTensorHandleFactoryId()

constexpr const char* armnn::RefTensorHandleFactoryId ( )
constexpr

Definition at line 15 of file RefTensorHandleFactory.hpp.

15 { return "Arm/Ref/TensorHandleFactory"; }

Referenced by RefTensorHandleFactory::GetIdStatic().

◆ RemoveReshapeLayer()

void armnn::RemoveReshapeLayer ( ReshapeLayer baseLayer,
std::map< LayerGuid, Layer * > &  untouched,
OptimizationViews optimizationViews 
)
inline

Definition at line 293 of file SubgraphUtils.hpp.

296 {
297  if (baseLayer == nullptr)
298  {
299  return;
300  }
301  ReshapeDescriptor reshapeDescriptor = baseLayer->GetParameters();
302  Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
303 
304  // Cannot currently remove the Reshape if it's connected to an Input, Constant or Splitter
305  if (parentLayer.GetType() == LayerType::Input || parentLayer.GetType() == LayerType::Constant)
306  {
307  return;
308  }
309 
310  // Cannot currently remove the Reshape if it's connected to an OutputSlot or Concat
311  for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
312  {
313  Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
314 
315  if (nextLayer.GetType() == LayerType::Output)
316  {
317  return;
318  }
319  }
320  auto it = untouched.find(baseLayer->GetGuid());
321  if (it == untouched.end())
322  {
323  // Already removed from map
324  return;
325  }
326  untouched.erase(it);
327 
328  // Override the InputSlot TensorInfos for all the layers connected to the Reshape's OutputSlot
329  for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i)
330  {
331  Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer();
332  auto inputIndex = baseLayer->GetOutputSlot(0).GetConnection(i)->GetSlotIndex();
333  TensorInfo reshapeInfo(baseLayer->GetOutputSlot(0).GetTensorInfo());
334  reshapeInfo.SetShape(reshapeDescriptor.m_TargetShape);
335  nextLayer.GetInputSlot(inputIndex).SetTensorInfo(reshapeInfo);
336  }
337  optimizationViews.AddDeletedSubgraph(baseLayer);
338 }

References OptimizationViews::AddDeletedSubgraph(), Constant, InputSlot::GetConnectedOutputSlot(), OutputSlot::GetConnection(), Layer::GetGuid(), Layer::GetInputSlot(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), LayerWithParameters< Parameters >::GetParameters(), InputSlot::GetSlotIndex(), OutputSlot::GetTensorInfo(), Layer::GetType(), Input, ReshapeDescriptor::m_TargetShape, Output, TensorInfo::SetShape(), and InputSlot::SetTensorInfo().

Referenced by RefBackend::OptimizeSubgraphView(), NeonBackend::OptimizeSubgraphView(), and ClBackend::OptimizeSubgraphView().

◆ ReorderWeightChannelsForAcl()

ConstTensor armnn::ReorderWeightChannelsForAcl ( const ConstTensor weightHandle,
DataLayout  dataLayout,
void *  permuteBuffer 
)

Definition at line 73 of file WorkloadUtils.cpp.

74 {
75  DataType* weight = static_cast<DataType*>(permuteBuffer);
76  const TensorShape& weightShape = weightHandle.GetShape();
77  unsigned int multiplier;
78  unsigned int height;
79  unsigned int width;
80  unsigned int inputChannels;
81  switch (dataLayout)
82  {
83  case DataLayout::NHWC: //It actually is [ H, W, I, M ]
84  height = weightShape[0];
85  width = weightShape[1];
86  inputChannels = weightShape[2];
87  multiplier = weightShape[3];
88  break;
89  case DataLayout::NCHW: //It actually is [ M, I, H, W ]
90  default:
91  height = weightShape[2];
92  width = weightShape[3];
93  inputChannels = weightShape[1];
94  multiplier = weightShape[0];
95  break;
96  }
97 
98  std::vector<DataType> weightAclOrder(height*width*inputChannels*multiplier);
99  unsigned int destinationWeightsChannel;
100  unsigned int totalChannels = inputChannels * multiplier;
101  unsigned int channelSize = height * width;
102  unsigned int inputChannel = 0;
103 
104  for (unsigned int originWeightsChannel = 0; originWeightsChannel < totalChannels; originWeightsChannel++)
105  {
106  inputChannel = originWeightsChannel % inputChannels;
107  destinationWeightsChannel = (originWeightsChannel - inputChannel) / inputChannels + multiplier * inputChannel;
108 
109  for (unsigned int i = 0; i < channelSize; i++)
110  {
111  weightAclOrder[i + destinationWeightsChannel * channelSize] =
112  weight[i + originWeightsChannel * channelSize];
113  }
114  }
115 
116  ::memcpy(permuteBuffer, weightAclOrder.data(), weightHandle.GetInfo().GetNumBytes());
117  return ConstTensor(weightHandle.GetInfo(), permuteBuffer);
118 }

References BaseTensor< MemoryType >::GetInfo(), TensorInfo::GetNumBytes(), BaseTensor< MemoryType >::GetShape(), NCHW, and NHWC.

◆ ReplaceLayers()

void armnn::ReplaceLayers ( OptimizationViews optimizationViews,
LayerType baseLayer,
std::vector< IConnectableLayer * > &  layers 
)

Definition at line 345 of file ArmComputeSubgraphUtils.hpp.

348 {
349  std::list<IConnectableLayer*> replacementLayers(layers.begin(), layers.end());
350 
351  SubgraphView substitutionSubgraph(baseLayer);
352  SubgraphView replacementSubgraph(std::move(replacementLayers),
353  CreateIInputsFrom({replacementLayers.front()}),
354  CreateIOutputsFrom({replacementLayers.back()}));
355 
356  optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
357 }

References OptimizationViews::AddSubstitution().

◆ ReplaceMultipleLayers()

void armnn::ReplaceMultipleLayers ( OptimizationViews optimizationViews,
std::vector< IConnectableLayer * > &  originalLayers,
LayerType baseLayer,
const std::vector< SlotList >  inputLayersSlotLists,
const std::vector< SlotList >  outputLayersSlotLists 
)

Definition at line 363 of file ArmComputeSubgraphUtils.hpp.

368 {
369  std::list<IConnectableLayer*> originalLayerList(originalLayers.begin(), originalLayers.end());
370 
371  SubgraphView substitutionSubgraph(
372  std::move(originalLayerList),
373  CreateIInputsFromSlotLists<armnn::IConnectableLayer>(originalLayers, inputLayersSlotLists),
374  CreateIOutputsFromSlotLists<armnn::IConnectableLayer>(originalLayers, outputLayersSlotLists));
375  SubgraphView replacementSubgraph(baseLayer);
376 
377  optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
378 }

References OptimizationViews::AddSubstitution().

◆ ReportError()

void armnn::ReportError ( const std::string &  errorMessage,
Optional< std::vector< std::string > & >  errorMessages 
)

Definition at line 756 of file Network.cpp.

758 {
759  std::stringstream fullErrorMessage;
760  fullErrorMessage << "ERROR: " << errorMessage;
761  ARMNN_LOG(warning) << fullErrorMessage.str();
762  if (errorMessages)
763  {
764  errorMessages.value().push_back(fullErrorMessage.str());
765  }
766 }

References ARMNN_LOG, and warning.

Referenced by AssignBackends(), CheckScaleSetOnQuantizedType(), Optimize(), and ReturnWithError().

◆ ReportUntouchedLayers()

void armnn::ReportUntouchedLayers ( OptimizationViews optimizationViews,
std::map< LayerGuid, Layer * >  untouched 
)
inline

Definition at line 220 of file SubgraphUtils.hpp.

221 {
222  std::vector<Layer*> untouchedVector;
223  for (const auto& pair : untouched)
224  {
225  Layer* layer = pair.second;
226  SubgraphView subgraphView({layer},
227  CreateIInputsFrom({layer}),
228  CreateIOutputsFrom({layer}));
229  optimizationViews.AddUntouchedSubgraph(std::move(subgraphView));
230  }
231 }

References OptimizationViews::AddUntouchedSubgraph().

Referenced by RefBackend::OptimizeSubgraphView(), NeonBackend::OptimizeSubgraphView(), ClBackend::OptimizeSubgraphView(), and GpuFsaBackend::OptimizeSubgraphView().

◆ ReportWarning()

void armnn::ReportWarning ( const std::string &  warningMessage,
Optional< std::vector< std::string > & >  warningMessages 
)

Definition at line 768 of file Network.cpp.

770 {
771  std::stringstream fullWarningMessage;
772  fullWarningMessage << "WARNING: " << warningMessage;
773  ARMNN_LOG(warning) << fullWarningMessage.str();
774  if (warningMessages)
775  {
776  warningMessages.value().push_back(fullWarningMessage.str());
777  }
778 }

References ARMNN_LOG, and warning.

Referenced by ApplyBackendOptimizations(), and AttemptBackendAssignment().

◆ RequiresCopy()

bool armnn::RequiresCopy ( ITensorHandleFactory::FactoryId  src,
ITensorHandleFactory::FactoryId  dst,
TensorHandleFactoryRegistry registry 
)

Definition at line 1458 of file Network.cpp.

1461 {
1462  if (src != dst)
1463  {
1464  ITensorHandleFactory* srcFactory = registry.GetFactory(src);
1465  ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
1466 
1467  if (srcFactory && dstFactory &&
1468  (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
1469  {
1470  return false;
1471  }
1472  return true;
1473  }
1474  return false;
1475 }

References ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), and ITensorHandleFactory::GetImportFlags().

Referenced by CalculateSlotOption().

◆ ReshapeWeightsForAcl()

void ReshapeWeightsForAcl ( TensorInfo weightInfo,
DataLayout  dataLayout 
)

Definition at line 47 of file WorkloadUtils.cpp.

48 {
49  // Reshape the weights in-place
50  const TensorShape& weightShape = weightInfo.GetShape();
51  switch (dataLayout)
52  {
53  case DataLayout::NHWC:
54  // The data layout is NHWC, reshape from [ H, W, I, M ] to [ 1, H, W, I * M ]
55  weightInfo.SetShape({ 1,
56  weightShape[0],
57  weightShape[1],
58  weightShape[2] * weightShape[3] });
59  weightInfo.SetShape({ 1,
60  weightShape[0] * weightShape[1],
61  weightShape[2],
62  weightShape[3] });
63  break;
64  case DataLayout::NCHW:
65  default:
66  // The data layout is NCHW, reshape from [ M, I, H, W ] to [ 1, I * M, H, W, ]
67  weightInfo.SetShape({ 1, weightShape[0] * weightShape[1], weightShape[2], weightShape[3] });
68  break;
69  }
70 }

References TensorInfo::GetShape(), NCHW, NHWC, and TensorInfo::SetShape().

Referenced by ConvertWeightTensorFromArmnnToAcl(), and ConvertWeightTensorInfoFromArmnnToAcl().

◆ Resize()

void Resize ( Decoder< float > &  in,
const TensorInfo inputInfo,
Encoder< float > &  out,
const TensorInfo outputInfo,
DataLayoutIndexed  dataLayout,
ResizeMethod  resizeMethod,
bool  alignCorners,
bool  halfPixelCenters 
)

Definition at line 65 of file Resize.cpp.

73 {
74  // alignCorners and halfPixelCenters cannot both be true
75  ARMNN_ASSERT(!(alignCorners && halfPixelCenters));
76 
77  // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
78  // image is projected into the input image to figure out the interpolants and weights. Note that this
79  // will yield different results than if projecting the centre of output texels.
80 
81  const unsigned int batchSize = inputInfo.GetShape()[0];
82  const unsigned int channelCount = inputInfo.GetShape()[dataLayout.GetChannelsIndex()];
83 
84  const unsigned int inputHeight = inputInfo.GetShape()[dataLayout.GetHeightIndex()];
85  const unsigned int inputWidth = inputInfo.GetShape()[dataLayout.GetWidthIndex()];
86  const unsigned int outputHeight = outputInfo.GetShape()[dataLayout.GetHeightIndex()];
87  const unsigned int outputWidth = outputInfo.GetShape()[dataLayout.GetWidthIndex()];
88 
89  // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates
90  // in the input image.
91  const float scaleY = CalculateResizeScale(inputHeight, outputHeight, alignCorners);
92  const float scaleX = CalculateResizeScale(inputWidth, outputWidth, alignCorners);
93 
94  const TensorShape& inputShape = inputInfo.GetShape();
95  const TensorShape& outputShape = outputInfo.GetShape();
96 
97  for (unsigned int n = 0; n < batchSize; ++n)
98  {
99  for (unsigned int c = 0; c < channelCount; ++c)
100  {
101  for (unsigned int y = 0; y < outputHeight; ++y)
102  {
103  // Corresponding real-valued height coordinate in input image.
104  float iy = PixelScaler(y, scaleY, halfPixelCenters, resizeMethod);
105 
106  // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
107  const float fiy = (resizeMethod == ResizeMethod::NearestNeighbor && alignCorners) ? armnn::roundf(iy)
108  : floorf(iy);
109  // Pixel scaling a value with Half Pixel Centers can be negative, if so set to 0
110  const unsigned int y0 = static_cast<unsigned int>(std::max(fiy, 0.0f));
111 
112  // Interpolation weight (range [0,1]).
113  const float yw = iy - fiy;
114 
115  for (unsigned int x = 0; x < outputWidth; ++x)
116  {
117  // Real-valued and discrete width coordinates in input image.
118  float ix = PixelScaler(x, scaleX, halfPixelCenters, resizeMethod);
119 
120  // Nearest Neighbour uses rounding to align to corners
121  const float fix = resizeMethod == ResizeMethod::NearestNeighbor && alignCorners ? armnn::roundf(ix)
122  : floorf(ix);
123  // Pixel scaling a value with Half Pixel Centers can be negative, if so set to 0
124  const unsigned int x0 = static_cast<unsigned int>(std::max(fix, 0.0f));
125 
126  // Interpolation weight (range [0,1]).
127  const float xw = ix - fix;
128 
129  unsigned int x1;
130  unsigned int y1;
131  // Half Pixel Centers uses the scaling to compute a weighted parameter for nearby pixels
132  if (halfPixelCenters)
133  {
134  x1 = std::min(static_cast<unsigned int>(std::ceil(ix)), inputWidth - 1u);
135  y1 = std::min(static_cast<unsigned int>(std::ceil(iy)), inputHeight - 1u);
136  }
137  // Discrete width/height coordinates of texels below and to the right of (x0, y0).
138  else
139  {
140  x1 = std::min(x0 + 1, inputWidth - 1u);
141  y1 = std::min(y0 + 1, inputHeight - 1u);
142  }
143 
144  float interpolatedValue;
145  switch (resizeMethod)
146  {
147  case ResizeMethod::Bilinear:
148  {
149  in[dataLayout.GetIndex(inputShape, n, c, y0, x0)];
150  float input1 = in.Get();
151  in[dataLayout.GetIndex(inputShape, n, c, y0, x1)];
152  float input2 = in.Get();
153  in[dataLayout.GetIndex(inputShape, n, c, y1, x0)];
154  float input3 = in.Get();
155  in[dataLayout.GetIndex(inputShape, n, c, y1, x1)];
156  float input4 = in.Get();
157 
158  const float ly0 = Lerp(input1, input2, xw); // lerp along row y0.
159  const float ly1 = Lerp(input3, input4, xw); // lerp along row y1.
160  interpolatedValue = Lerp(ly0, ly1, yw);
161  break;
162  }
163  case ResizeMethod::NearestNeighbor:
164  {
165  // calculate euclidean distance to the 4 neighbours
166  auto distance00 = EuclideanDistance(fix, fiy, x0, y0);
167  auto distance01 = EuclideanDistance(fix, fiy, x0, y1);
168  auto distance10 = EuclideanDistance(fix, fiy, x1, y0);
169  auto distance11 = EuclideanDistance(fix, fiy, x1, y1);
170 
171  auto minimum = std::min( { distance00, distance01, distance10, distance11 } );
172 
173  unsigned int xNearest = 0;
174  unsigned int yNearest = 0;
175 
176  if (minimum == distance00)
177  {
178  xNearest = x0;
179  yNearest = y0;
180  }
181  else if (minimum == distance01)
182  {
183  xNearest = x0;
184  yNearest = y1;
185  }
186  else if (minimum == distance10)
187  {
188  xNearest = x1;
189  yNearest = y0;
190  }
191  else if (minimum == distance11)
192  {
193  xNearest = x1;
194  yNearest = y1;
195  }
196  else
197  {
198  throw InvalidArgumentException("Resize Nearest Neighbor failure");
199  }
200 
201  in[dataLayout.GetIndex(inputShape, n, c, yNearest, xNearest)];
202  interpolatedValue = in.Get();
203  break;
204  }
205  default:
206  throw InvalidArgumentException("Unknown resize method: " +
207  std::to_string(static_cast<int>(resizeMethod)));
208  }
209  out[dataLayout.GetIndex(outputShape, n, c, y, x)];
210  out.Set(interpolatedValue);
211  }
212  }
213  }
214  }
215 }

References ARMNN_ASSERT, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Resize(), roundf(), and Encoder< IType >::Set().

Referenced by Resize(), and ResizeLayer::ResizeLayer().

◆ ReturnWithError()

OptimizationResult armnn::ReturnWithError ( OptimizationResult  res,
const Layer layer,
const BackendSettings backendSettings,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 780 of file Network.cpp.

784 {
785  std::stringstream failureMsg;
786  failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
787  << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
788  ReportError(failureMsg.str(), errMessages);
789 
790  res.m_Error = true;
791  return res;
792 }

References GetLayerTypeAsCString(), Layer::GetType(), OptimizationResult::m_Error, BackendSettings::m_PreferredBackends, and ReportError().

Referenced by AssignBackendsIConnectable(), and AttemptBackendAssignment().

◆ ReverseGetFlatIdx()

unsigned int armnn::ReverseGetFlatIdx ( const std::vector< unsigned int > &  idxList,
unsigned int  inputRank,
std::vector< unsigned int > &  elementNumInner 
)

Definition at line 34 of file ReverseV2Impl.cpp.

37 {
38  unsigned int idx = 0;
39 
40  for (unsigned int iDim = 0; iDim < inputRank; ++iDim)
41  {
42  idx += idxList[iDim] * elementNumInner[iDim];
43  }
44 
45  return idx;
46 }

Referenced by ReverseRelocateIdx().

◆ ReverseGetMultIdx()

std::vector<unsigned int> armnn::ReverseGetMultIdx ( const unsigned int  idx,
unsigned int  inputRank,
std::vector< unsigned int > &  elementNumInner 
)

Definition at line 16 of file ReverseV2Impl.cpp.

19 {
20  std::vector<unsigned int> indexList(inputRank);
21 
22  unsigned int mIdx = idx;
23 
24  for (unsigned int iDim = 0; iDim < inputRank; ++iDim)
25  {
26  indexList[iDim] = static_cast<unsigned int>(mIdx / elementNumInner[iDim]);
27  mIdx %= elementNumInner[iDim];
28  }
29 
30  return indexList;
31 }

Referenced by ReverseRelocateIdx().

◆ ReverseRelocateIdx()

unsigned int armnn::ReverseRelocateIdx ( unsigned int  idx,
unsigned int  inputRank,
std::vector< bool > &  axisFlag,
std::vector< unsigned int > &  dimSize,
std::vector< unsigned int > &  elementNumInner 
)

Definition at line 49 of file ReverseV2Impl.cpp.

54 {
55  // Get the multidimensional index list for input
56  auto inputIdxList = ReverseGetMultIdx(idx, inputRank, elementNumInner);
57 
58  std::vector<unsigned int> outputIdxList(inputRank);
59 
60  // Relocate the input index to the output one
61  for (unsigned int iDim = 0; iDim < inputRank; ++iDim)
62  {
63  if (axisFlag[iDim])
64  {
65  outputIdxList[iDim] = dimSize[iDim] - inputIdxList[iDim] - 1;
66  }
67  else
68  {
69  outputIdxList[iDim] = inputIdxList[iDim];
70  }
71  }
72 
73  // Get the 1-dimensional flattened index for output
74  unsigned int outputIdx = ReverseGetFlatIdx(outputIdxList, inputRank, elementNumInner);
75  return outputIdx;
76 }

References ReverseGetFlatIdx(), and ReverseGetMultIdx().

Referenced by ReverseV2().

◆ ReverseV2()

void ReverseV2 ( const TensorInfo inputInfo,
const TensorInfo axisInfo,
Decoder< float > &  inputDecoder,
Decoder< int > &  axisDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 78 of file ReverseV2Impl.cpp.

83 {
84  unsigned int axesRank = static_cast<unsigned int>(axisInfo.GetNumElements());
85 
86  // Empty axis and empty tensor case: copy input to output
87  if ((axesRank == 0) || inputInfo.GetNumElements() == 0)
88  {
89  for (unsigned idx = 0; idx < inputInfo.GetNumElements(); idx++)
90  {
91  float inputValue = inputDecoder.Get();
92  inputDecoder += 1;
93  outputEncoder.Set(inputValue);
94  outputEncoder += 1;
95  }
96  return;
97  }
98 
99  unsigned int inputRank = static_cast<unsigned int>(inputInfo.GetNumDimensions());
100 
101  std::vector<bool> axisFlag(inputRank, false);
102  std::vector<unsigned int> dimSize(inputRank, 0);
103  std::vector<int32_t> axis(axesRank, 0);
104 
105  // Decode the axis information
106  for (unsigned int i=0; i < axesRank; i++)
107  {
108  axis[i] = axisDecoder.Get();
109  axisDecoder += 1;
110  }
111 
112  // Make sure the axes are positive
113  for (int32_t axisElement: axis)
114  {
115  axisElement = axisElement < 0 ? axisElement + static_cast<int32_t>(inputRank) : axisElement;
116  axisFlag[static_cast<uint32_t>(axisElement)] = true;
117  }
118 
119  const TensorShape &inputShape = inputInfo.GetShape();
120 
121  unsigned int elementNum = inputInfo.GetNumElements();
122  unsigned int baseDimSize = 1;
123 
124  std::vector<unsigned int> elementNumInner;
125 
126  // Get the number of element within the specific dimension
127  for (unsigned int iDim = 0; iDim < inputRank; ++iDim) {
128  dimSize[iDim] = inputShape[iDim];
129  baseDimSize *= dimSize[iDim];
130  elementNumInner.push_back(static_cast<unsigned int>(elementNum / baseDimSize));
131  }
132 
133  // Iterate through all elements
134  for (unsigned int idx = 0; idx < elementNum; ++idx)
135  {
136  float inputValue = inputDecoder.Get();
137  inputDecoder += 1;
138  auto outputIdx = ReverseRelocateIdx(idx, inputRank, axisFlag, dimSize, elementNumInner);
139  outputEncoder[outputIdx];
140  outputEncoder.Set(inputValue);
141  }
142 }

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), ReverseRelocateIdx(), and Encoder< IType >::Set().

◆ RevertConstantWeightsToFP32()

bool armnn::RevertConstantWeightsToFP32 ( Layer layer)

◆ roundf()

float armnn::roundf ( float  value)
inline

Definition at line 43 of file Utils.hpp.

44 {
45  // Workaround Valgrind's mismatches: when running from Valgrind the call to std::round(4.5) == 4.0 instead of 5.0
46  return (value < 0.f) ? ::floorf(value - 0.5f) : ::floorf(value + 0.5f);
47 }

Referenced by Resize().

◆ RunClFunction()

void RunClFunction ( arm_compute::IFunction &  function,
const CheckLocation location 
)
inline

Definition at line 168 of file ClWorkloadUtils.hpp.

169 {
170  try
171  {
172  function.run();
173  }
174  catch (cl::Error& error)
175  {
176  throw WrapClError(error, location);
177  }
178 }

References error, and WrapClError().

Referenced by ClFillWorkload::Execute(), ClPadWorkload::Execute(), ClAdditionWorkload::Execute(), ClSubtractionWorkload::Execute(), ClActivationWorkload::Execute(), ClNegWorkload::Execute(), ClCastWorkload::Execute(), ClExpWorkload::Execute(), ClPreluWorkload::Execute(), ClConvertFp16ToFp32Workload::Execute(), ClQuantizeWorkload::Execute(), ClRsqrtWorkload::Execute(), ClSinWorkload::Execute(), ClAbsWorkload::Execute(), ClConvertFp32ToFp16Workload::Execute(), ClSqrtWorkload::Execute(), ClLogWorkload::Execute(), ClLstmFloatWorkload::Execute(), ClNormalizationFloatWorkload::Execute(), ClSpaceToDepthWorkload::Execute(), ClFloorFloatWorkload::Execute(), ClReshapeWorkload::Execute(), ClResizeWorkload::Execute(), ClGatherWorkload::Execute(), ClInstanceNormalizationWorkload::Execute(), ClMaximumWorkload::Execute(), ClMinimumWorkload::Execute(), ClArgMinMaxWorkload::Execute(), ClChannelShuffleWorkload::Execute(), ClComparisonWorkload::Execute(), ClBatchMatMulWorkload::Execute(), ClSliceWorkload::Execute(), ClL2NormalizationFloatWorkload::Execute(), ClSpaceToBatchNdWorkload::Execute(), ClDepthToSpaceWorkload::Execute(), ClStridedSliceWorkload::Execute(), ClDivisionWorkload::Execute(), ClMultiplicationWorkload::Execute(), ClPooling2dWorkload::Execute(), ClGatherNdWorkload::Execute(), ClPooling3dWorkload::Execute(), ClBatchToSpaceNdWorkload::Execute(), ClPermuteWorkload::Execute(), ClTransposeWorkload::Execute(), ClLogSoftmaxWorkload::Execute(), ClQuantizedLstmWorkload::Execute(), ClSoftmaxWorkload::Execute(), ClDepthwiseConvolutionWorkload::Execute(), ClBatchNormalizationFloatWorkload::Execute(), ClFullyConnectedWorkload::Execute(), ClConvolution3dWorkload::Execute(), ClTransposeConvolution2dWorkload::Execute(), and ClConvolution2dWorkload::Execute().

◆ SelectTensorHandleStrategy()

OptimizationResult SelectTensorHandleStrategy ( Graph optGraph,
BackendsMap backends,
TensorHandleFactoryRegistry registry,
bool  importEnabled,
bool  exportEnabled,
Optional< std::vector< std::string > & >  errMessages 
)

Definition at line 1812 of file Network.cpp.

1818 {
1819  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_SelectTensorHandleStrategy");
1820  OptimizationResult result;
1821 
1822  optGraph.ForEachLayer([&backends, &registry, &result, &errMessages, importEnabled, exportEnabled](Layer* layer)
1823  {
1824  ARMNN_ASSERT(layer);
1825 
1826  // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
1827  // assignment if this check fails
1828  ARMNN_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
1829 
1830  // Check each output separately
1831  for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
1832  {
1833  OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
1834 
1835  ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
1836 
1837  // Calculate the factory to use which results in the fewest copies being made.
1838  switch(layer->GetType())
1839  {
1840  case LayerType::Input:
1841  slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry, importEnabled);
1842  break;
1843  case LayerType::Output:
1844  slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
1845  break;
1846  default:
1847  slotOption = CalculateSlotOption(backends, outputSlot, registry, exportEnabled);
1848  break;
1849  }
1850  outputSlot.SetTensorHandleFactory(slotOption);
1851 
1852  // Now determine the "best" edge strategy for each connection given the slotOption.
1853  unsigned int connectionIdx = 0;
1854  for (auto&& connection : outputSlot.GetConnections())
1855  {
1856  const Layer& connectedLayer = connection->GetOwningLayer();
1857 
1858  EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer,
1859  registry, importEnabled);
1860 
1861  if (strategy == EdgeStrategy::Undefined)
1862  {
1863  result.m_Error = true;
1864  if (errMessages)
1865  {
1866  errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
1867  " between backends.");
1868  }
1869  return;
1870  }
1871 
1872  outputSlot.SetEdgeStrategy(connectionIdx, strategy);
1873 
1874  connectionIdx++;
1875  }
1876  }
1877  });
1878 
1879  return result;
1880 }

References ARMNN_ASSERT, ARMNN_SCOPED_PROFILING_EVENT, CalculateEdgeStrategy(), CalculateSlotOption(), CalculateSlotOptionForInput(), CalculateSlotOptionForOutput(), Graph::ForEachLayer(), Layer::GetBackendId(), OutputSlot::GetConnections(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), Layer::GetType(), Input, ITensorHandleFactory::LegacyFactoryId, OptimizationResult::m_Error, Output, OutputSlot::SetEdgeStrategy(), OutputSlot::SetTensorHandleFactory(), and Undefined.

Referenced by Optimize().

◆ SetAllLoggingSinks()

void SetAllLoggingSinks ( bool  standardOut,
bool  debugOut,
bool  coloured 
)

Definition at line 191 of file Logging.cpp.

192 {
193  SetLoggingSinks<LogSeverity::Trace>(standardOut, debugOut, coloured);
194  SetLoggingSinks<LogSeverity::Debug>(standardOut, debugOut, coloured);
195  SetLoggingSinks<LogSeverity::Info>(standardOut, debugOut, coloured);
196  SetLoggingSinks<LogSeverity::Warning>(standardOut, debugOut, coloured);
197  SetLoggingSinks<LogSeverity::Error>(standardOut, debugOut, coloured);
198  SetLoggingSinks<LogSeverity::Fatal>(standardOut, debugOut, coloured);
199 }

Referenced by ConfigureLogging(), and TEST_SUITE().

◆ SetClSliceData()

auto SetClSliceData ( const std::vector< unsigned int > &  m_begin,
const std::vector< unsigned int > &  m_size 
)
inline

Definition at line 100 of file ClWorkloadUtils.hpp.

102 {
103  // This function must translate the size vector given to an end vector
104  // expected by the ACL NESlice workload
107 
108  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
109 
110  // For strided slices, we have the relationship size = (end - begin) / stride
111  // For slice, we assume stride to be a vector of all ones, yielding the formula
112  // size = (end - begin) therefore we know end = size + begin
113  for (unsigned int i = 0; i < num_dims; i++)
114  {
115  unsigned int revertedIndex = num_dims - i - 1;
116 
117  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
118  ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
119  }
120 
121  return std::make_tuple(starts, ends);
122 }

Referenced by ClSliceWorkload::ClSliceWorkload().

◆ SetClStridedSliceData()

auto SetClStridedSliceData ( const std::vector< int > &  m_begin,
const std::vector< int > &  m_end,
const std::vector< int > &  m_stride 
)
inline

Definition at line 79 of file ClWorkloadUtils.hpp.

82 {
86 
87  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
88 
89  for (unsigned int i = 0; i < num_dims; i++) {
90  unsigned int revertedIndex = num_dims - i - 1;
91 
92  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
93  ends.set(i, static_cast<int>(m_end[revertedIndex]));
94  strides.set(i, static_cast<int>(m_stride[revertedIndex]));
95  }
96 
97  return std::make_tuple(starts, ends, strides);
98 }

Referenced by ClStridedSliceWorkload::ClStridedSliceWorkload().

◆ SetLogFilter()

void SetLogFilter ( LogSeverity  level)

Definition at line 73 of file Logging.cpp.

74 {
75  SimpleLogger<LogSeverity::Trace>::Get().Enable(false);
76  SimpleLogger<LogSeverity::Debug>::Get().Enable(false);
77  SimpleLogger<LogSeverity::Info>::Get().Enable(false);
78  SimpleLogger<LogSeverity::Warning>::Get().Enable(false);
79  SimpleLogger<LogSeverity::Error>::Get().Enable(false);
80  SimpleLogger<LogSeverity::Fatal>::Get().Enable(false);
81  switch (level)
82  {
83  case LogSeverity::Trace:
84  SimpleLogger<LogSeverity::Trace>::Get().Enable(true);
86  case LogSeverity::Debug:
87  SimpleLogger<LogSeverity::Debug>::Get().Enable(true);
89  case LogSeverity::Info:
90  SimpleLogger<LogSeverity::Info>::Get().Enable(true);
92  case LogSeverity::Warning:
93  SimpleLogger<LogSeverity::Warning>::Get().Enable(true);
95  case LogSeverity::Error:
96  SimpleLogger<LogSeverity::Error>::Get().Enable(true);
98  case LogSeverity::Fatal:
99  SimpleLogger<LogSeverity::Fatal>::Get().Enable(true);
100  break;
101  default:
102  ARMNN_ASSERT(false);
103  }
104 }

References ARMNN_ASSERT, ARMNN_FALLTHROUGH, Debug, SimpleLogger< Level >::Enable(), Error, Fatal, SimpleLogger< Level >::Get(), Info, Trace, and Warning.

Referenced by ConfigureLogging(), and TEST_SUITE().

◆ SetLoggingSinks()

void armnn::SetLoggingSinks ( bool  standardOut,
bool  debugOut,
bool  coloured 
)
inline

Definition at line 167 of file Logging.cpp.

168 {
169  SimpleLogger<Level>::Get().RemoveAllSinks();
170 
171  if (standardOut)
172  {
173  if (coloured)
174  {
175  SimpleLogger<Level>::Get().AddSink(
176  std::make_shared<StandardOutputColourSink>(Level));
177  } else
178  {
179  SimpleLogger<Level>::Get().AddSink(
180  std::make_shared<StandardOutputSink>());
181  }
182  }
183 
184  if (debugOut)
185  {
186  SimpleLogger<Level>::Get().AddSink(
187  std::make_shared<DebugOutputSink>());
188  }
189 }

References SimpleLogger< Level >::AddSink(), SimpleLogger< Level >::Get(), and SimpleLogger< Level >::RemoveAllSinks().

◆ SetNeonSliceData()

auto armnn::SetNeonSliceData ( const std::vector< unsigned int > &  m_begin,
const std::vector< unsigned int > &  m_size 
)
inline

Definition at line 161 of file NeonWorkloadUtils.hpp.

163 {
164  // This function must translate the size vector given to an end vector
165  // expected by the ACL NESlice workload
168 
169  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
170 
171  // For strided slices, we have the relationship size = (end - begin) / stride
172  // For slice, we assume stride to be a vector of all ones, yielding the formula
173  // size = (end - begin) therefore we know end = size + begin
174  for (unsigned int i = 0; i < num_dims; i++)
175  {
176  unsigned int revertedIndex = num_dims - i - 1;
177 
178  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
179  ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
180  }
181 
182  return std::make_tuple(starts, ends);
183 }

Referenced by NeonSliceWorkload::NeonSliceWorkload().

◆ SetNeonStridedSliceData()

auto armnn::SetNeonStridedSliceData ( const std::vector< int > &  m_begin,
const std::vector< int > &  m_end,
const std::vector< int > &  m_stride 
)
inline

Definition at line 139 of file NeonWorkloadUtils.hpp.

142 {
145  arm_compute::Coordinates strides;
146 
147  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
148 
149  for (unsigned int i = 0; i < num_dims; i++)
150  {
151  unsigned int revertedIndex = num_dims - i - 1;
152 
153  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
154  ends.set(i, static_cast<int>(m_end[revertedIndex]));
155  strides.set(i, static_cast<int>(m_stride[revertedIndex]));
156  }
157 
158  return std::make_tuple(starts, ends, strides);
159 }

Referenced by NeonStridedSliceWorkload::NeonStridedSliceWorkload().

◆ SetValueChecked()

◆ Slice()

void Slice ( const TensorInfo inputInfo,
const SliceDescriptor descriptor,
const void *  inputData,
void *  outputData,
unsigned int  dataTypeSize 
)

Definition at line 14 of file Slice.cpp.

19 {
20  const TensorShape& inputShape = inputInfo.GetShape();
21  const unsigned int numDims = inputShape.GetNumDimensions();
22 
23  constexpr unsigned int maxNumDims = 4;
24  if (descriptor.m_Begin.size() != numDims)
25  {
26  std::stringstream msg;
27  msg << "Slice: Number of dimensions (" << numDims <<
28  ") does not match the Begin vector in the descriptor (" << descriptor.m_Begin.size() << ")";
29  throw InvalidArgumentException(msg.str());
30  }
31  if (descriptor.m_Size.size() != numDims)
32  {
33  std::stringstream msg;
34  msg << "Slice: Number of dimensions (" << numDims <<
35  ") does not match the Size vector in the descriptor (" << descriptor.m_Size.size() << ")";
36  throw InvalidArgumentException(msg.str());
37  }
38  if (numDims > maxNumDims)
39  {
40  std::stringstream msg;
41  msg << "Slice: Number of dimensions (" << numDims <<
42  ") is greater than the maximum supported (" << maxNumDims << ")";
43  throw InvalidArgumentException(msg.str());
44  }
45 
46  std::vector<unsigned int> paddedInput(4);
47  std::vector<unsigned int> paddedBegin(4);
48  std::vector<unsigned int> paddedSize (4);
49 
50  const unsigned int numPaddingDims = maxNumDims - numDims;
51  for (unsigned int i = 0u; i < maxNumDims; ++i)
52  {
53  if (i < numPaddingDims)
54  {
55  paddedInput[i] = 1u;
56  paddedBegin[i] = 0u;
57  paddedSize[i] = 1u;
58  }
59  else
60  {
61  const unsigned int j = i - numPaddingDims;
62  paddedInput[i] = inputShape[j];
63  paddedBegin[i] = descriptor.m_Begin[j];
64  paddedSize[i] = descriptor.m_Size[j];
65  }
66  }
67 
68  unsigned int dim0 = paddedInput[0];
69  unsigned int dim1 = paddedInput[1];
70  unsigned int dim2 = paddedInput[2];
71  unsigned int dim3 = paddedInput[3];
72 
73  unsigned int begin0 = paddedBegin[0];
74  unsigned int begin1 = paddedBegin[1];
75  unsigned int begin2 = paddedBegin[2];
76  unsigned int begin3 = paddedBegin[3];
77 
78  unsigned int size0 = paddedSize[0];
79  unsigned int size1 = paddedSize[1];
80  unsigned int size2 = paddedSize[2];
81  unsigned int size3 = paddedSize[3];
82 
83  if (begin0 + size0 > dim0)
84  {
85  std::stringstream msg;
86  msg << "Slice: begin0 + size0 (" << (begin0 + size0) <<
87  ") exceeds dim0 (" << dim0 << ")";
88  throw InvalidArgumentException(msg.str());
89  }
90  if (begin1 + size1 > dim1)
91  {
92  std::stringstream msg;
93  msg << "Slice: begin1 + size1 (" << (begin1 + size1) <<
94  ") exceeds dim2 (" << dim1 << ")";
95  throw InvalidArgumentException(msg.str());
96  }
97  if (begin2 + size2 > dim2)
98  {
99  std::stringstream msg;
100  msg << "Slice: begin2 + size2 (" << (begin2 + size2) <<
101  ") exceeds dim2 (" << dim2 << ")";
102  throw InvalidArgumentException(msg.str());
103  }
104  if (begin3 + size3 > dim3)
105  {
106  std::stringstream msg;
107  msg << "Slice: begin3 + size3 (" << (begin3 + size3) <<
108  ") exceeds dim3 (" << dim3 << ")";
109  throw InvalidArgumentException(msg.str());
110  }
111 
112  if (inputData == nullptr)
113  {
114  throw armnn::NullPointerException("Slice: Null inputData pointer");
115  }
116  if (outputData == nullptr)
117  {
118  throw armnn::NullPointerException("Slice: Null outputData pointer");
119  }
120 
121  const unsigned char* input = reinterpret_cast<const unsigned char*>(inputData);
122  unsigned char* output = reinterpret_cast<unsigned char*>(outputData);
123 
124  for (unsigned int idx0 = begin0; idx0 < begin0 + size0; ++idx0)
125  {
126  for (unsigned int idx1 = begin1; idx1 < begin1 + size1; ++idx1)
127  {
128  for (unsigned int idx2 = begin2; idx2 < begin2 + size2; ++idx2)
129  {
130  for (unsigned int idx3 = begin3; idx3 < begin3 + size3; ++idx3)
131  {
132  const unsigned int inputOffset =
133  (((idx0 * dim1 + idx1) * dim2 + idx2) * dim3 + idx3) * dataTypeSize;
134 
135  ::memcpy(output, input + inputOffset, dataTypeSize);
136  output += dataTypeSize;
137  }
138  }
139  }
140  }
141 }

References TensorShape::GetNumDimensions(), TensorInfo::GetShape(), SliceDescriptor::m_Begin, and SliceDescriptor::m_Size.

◆ Softmax()

void Softmax ( Decoder< float > &  in,
Encoder< float > &  out,
const TensorInfo inputTensorInfo,
float  beta,
int  axis 
)

Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.

Definition at line 17 of file Softmax.cpp.

18 {
19  ARMNN_ASSERT_MSG(axis < static_cast<int>(inputTensorInfo.GetNumDimensions()),
20  "Required axis index greater than number of dimensions.");
21  ARMNN_ASSERT_MSG(axis >= -static_cast<int>(inputTensorInfo.GetNumDimensions()),
22  "Required axis index lower than negative of the number of dimensions");
23 
24  unsigned int uAxis = axis < 0 ?
25  inputTensorInfo.GetNumDimensions() - static_cast<unsigned int>(abs(axis))
26  : static_cast<unsigned int>(axis);
27 
28  const TensorShape& inputShape = inputTensorInfo.GetShape();
29  const unsigned int outerSize = armnnUtils::GetNumElementsBetween(inputShape, 0, uAxis);
30  const unsigned int axisSize = inputShape[uAxis];
31  const unsigned int innerSize = armnnUtils::GetNumElementsBetween(inputShape,
32  uAxis + 1,
33  inputShape.GetNumDimensions());
34 
35  for (unsigned int outer = 0; outer < outerSize; ++outer)
36  {
37  unsigned int inputBeginIdx = outer * axisSize * innerSize;
38  unsigned int inputEndIdx = inputBeginIdx + axisSize * innerSize;
39  unsigned int outputBeginIdx = outer * axisSize * innerSize;
40 
41  for (unsigned int inner = 0; inner < innerSize; ++inner, ++inputBeginIdx, ++inputEndIdx, ++outputBeginIdx)
42  {
43  // Find max
44  float maxValue = std::numeric_limits<float>::lowest();
45  for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize)
46  {
47  in[iter];
48  maxValue = std::max(maxValue, in.Get());
49  }
50 
51  // Compute sum
52  float sum = 0.0f;
53  for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize)
54  {
55  in[iter];
56  sum += std::exp((in.Get() - maxValue) * beta);
57  }
58 
59  // Compute result
60  unsigned int outputIter = outputBeginIdx;
61  out[outputIter];
62  for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize, outputIter += innerSize)
63  {
64  out[outputIter];
65  in[iter];
66  out.Set(std::exp((in.Get() - maxValue) * beta) / sum);
67  }
68  }
69  }
70 }

References ARMNN_ASSERT_MSG, Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), and Encoder< IType >::Set().

◆ SpaceToBatchNd()

void SpaceToBatchNd ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const SpaceToBatchNdDescriptor params,
Decoder< float > &  inputData,
Encoder< float > &  outputData 
)

Definition at line 48 of file SpaceToBatchNd.cpp.

53 {
54  unsigned int rank = inputInfo.GetNumDimensions();
55  if (rank != 3 && rank != 4 )
56  {
57  throw InvalidArgumentException("Tensor rank must be either 3 or 4, but it is " + std::to_string(rank),
58  CHECK_LOCATION());
59  }
60 
61  DataLayoutIndexed dataLayout = params.m_DataLayout;
62  unsigned int channelDimension3D = params.m_DataLayout == DataLayout::NCHW ? 1 : 2;
63 
64  const TensorShape& inputShape = inputInfo.GetShape();
65  const TensorShape& outputShape = outputInfo.GetShape();
66 
67  const unsigned int inputBatchSize = inputShape[0];
68  const unsigned int outputBatchSize = outputShape[0];
69 
70  const unsigned int channels = (rank == 3) ? inputShape[channelDimension3D]
71  : inputShape[dataLayout.GetChannelsIndex()];
72 
73  const unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
74  const unsigned int inputWidth = (rank == 3) ? 1 : inputShape[dataLayout.GetWidthIndex()];
75  const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
76  const unsigned int outputWidth = (rank == 3) ? 1 : outputShape[dataLayout.GetWidthIndex()];
77 
78  const unsigned int blockHeight = params.m_BlockShape[0];
79  const unsigned int blockWidth = (rank == 3) ? 1 : params.m_BlockShape[1];
80 
81  const unsigned int paddingTop = params.m_PadList[0].first;
82  const unsigned int paddingLeft = (rank == 3) ? 0 : params.m_PadList[1].first;
83 
84  for (unsigned int outB = 0; outB < outputBatchSize; ++outB)
85  {
86  unsigned int inB = outB % inputBatchSize;
87 
88  unsigned int shiftW = (outB / inputBatchSize) % blockWidth;
89  unsigned int shiftH = (outB / inputBatchSize) / blockWidth;
90 
91  for (unsigned int outH = 0; outH < outputHeight; ++outH)
92  {
93  for (unsigned int outW = 0; outW < outputWidth; ++outW)
94  {
95  if (outH * blockHeight + shiftH < paddingTop ||
96  outH * blockHeight + shiftH >= paddingTop + inputHeight ||
97  outW * blockWidth + shiftW < paddingLeft ||
98  outW * blockWidth + shiftW >= paddingLeft + inputWidth)
99  {
100  for (unsigned int c = 0; c < channels; c++)
101  {
102  unsigned int outOffset = GetOffset(outputShape,
103  outB,
104  outH,
105  outW,
106  c,
107  dataLayout);
108  outputData += outOffset;
109  outputData.Set(0);
110  outputData -= outOffset;
111  }
112  }
113  else
114  {
115  for (unsigned int c = 0; c < channels; c++)
116  {
117  unsigned int inOffset = GetOffset(inputShape,
118  inB,
119  (outH * blockHeight + shiftH) - paddingTop,
120  (outW * blockWidth + shiftW) - paddingLeft,
121  c,
122  dataLayout);
123 
124  unsigned int outOffset = GetOffset(outputShape,
125  outB,
126  outH,
127  outW,
128  c,
129  dataLayout);
130 
131  outputData += outOffset;
132  inputData += inOffset;
133  outputData.Set(inputData.Get());
134  inputData -= inOffset;
135  outputData -= outOffset;
136  }
137  }
138  }
139  }
140  }
141 }

References CHECK_LOCATION, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetNumDimensions(), GetOffset(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToBatchNdDescriptor::m_BlockShape, SpaceToBatchNdDescriptor::m_DataLayout, SpaceToBatchNdDescriptor::m_PadList, Encoder< IType >::Set(), and SpaceToBatchNd().

Referenced by SpaceToBatchNd(), and SpaceToBatchNdLayer::SpaceToBatchNdLayer().

◆ SpaceToDepth()

void SpaceToDepth ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const SpaceToDepthDescriptor params,
Decoder< float > &  inputData,
Encoder< float > &  outputData 
)

Definition at line 36 of file SpaceToDepth.cpp.

41 {
42  DataLayoutIndexed dataLayout = params.m_DataLayout;
43 
44  const TensorShape& inputShape = inputInfo.GetShape();
45  const TensorShape& outputShape = outputInfo.GetShape();
46 
47  const unsigned int inputBatchSize = inputShape[0];
48  const unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
49 
50  const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
51  const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
52  const unsigned int outputChannels = outputShape[dataLayout.GetChannelsIndex()];
53 
54  const unsigned int blockSize = params.m_BlockSize;
55 
56  if (blockSize == 0)
57  {
59  "Input shape must be divisible by block size in all spatial dimensions: Block size is"
60  " equal to zero");
61  }
62 
63  for (unsigned int outChannelIndex = 0; outChannelIndex < outputChannels; outChannelIndex++)
64  {
65  unsigned int inChannelIndex = outChannelIndex % inputChannels;
66 
67  unsigned int shiftW = (outChannelIndex / inputChannels) % blockSize;
68  unsigned int shiftH = (outChannelIndex / inputChannels) / blockSize;
69 
70  for (unsigned int outH = 0; outH < outputHeight; outH++)
71  {
72  for (unsigned int outW = 0; outW < outputWidth; outW++)
73  {
74  for (unsigned int inBatchIndex = 0; inBatchIndex < inputBatchSize; inBatchIndex++)
75  {
76  unsigned int inOffset = GetOffset(inputShape,
77  inChannelIndex,
78  (outH * blockSize + shiftH),
79  (outW * blockSize + shiftW),
80  inBatchIndex,
81  dataLayout);
82 
83  unsigned int outOffset = GetOffset(outputShape,
84  outChannelIndex,
85  outH,
86  outW,
87  inBatchIndex,
88  dataLayout);
89 
90  outputData += outOffset;
91  inputData += inOffset;
92  outputData.Set(inputData.Get());
93  inputData -= inOffset;
94  outputData -= outOffset;
95  }
96  }
97  }
98  }
99 }

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), GetOffset(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToDepthDescriptor::m_BlockSize, SpaceToDepthDescriptor::m_DataLayout, Encoder< IType >::Set(), and SpaceToDepth().

Referenced by SpaceToDepth(), and SpaceToDepthLayer::SpaceToDepthLayer().

◆ Split()

void Split ( const SplitterQueueDescriptor data,
std::vector< ITensorHandle * >  inputs,
std::vector< ITensorHandle * >  outputs 
)

Definition at line 21 of file Splitter.cpp.

24 {
25  const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
26 
27  std::unique_ptr<Decoder<float>> decoderPtr =
28  MakeDecoder<float>(inputInfo, inputs[0]->Map());
29  Decoder<float>& decoder = *decoderPtr;
30 
31  for (unsigned int index = 0; index < inputInfo.GetNumElements(); ++index)
32  {
33  unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
34 
35  unsigned int indexRemainder = index;
36  unsigned int dimensionStride = inputInfo.GetNumElements();
37 
38  for (unsigned int i = 0; i<inputInfo.GetNumDimensions(); i++)
39  {
40  dimensionStride /= inputInfo.GetShape()[i];
41  indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
42  indexRemainder -= indices[i] * dimensionStride;
43  }
44 
45  for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
46  {
47  SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
48 
49  //Split view extents are defined by the size of (the corresponding) input tensor.
50  const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
51  ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo.GetNumDimensions());
52 
53  // Check all dimensions to see if this element is inside the given input view.
54  bool insideView = true;
55  for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
56  {
57  if (indices[i] < view.m_Origin[i])
58  {
59  insideView = false;
60  }
61  if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
62  {
63  insideView = false;
64  }
65  }
66 
67  if (insideView)
68  {
69  std::unique_ptr<Encoder<float>> encoderPtr =
70  MakeEncoder<float>(outputInfo, outputs[viewIdx]->Map());
71  Encoder<float>& encoder = *encoderPtr;
72 
73  unsigned int outIndex = 0;
74  unsigned int dimensionStride = 1;
75  float inputValue = 0.f;
76 
77  for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
78  {
79  outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
80  dimensionStride *= outputInfo.GetShape()[i];
81  }
82 
83  decoder += index;
84  inputValue = decoder.Get();
85  decoder -= index;
86 
87  encoder += outIndex;
88  encoder.Set(inputValue);
89  break;
90  }
91  }
92  }
93 }

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), SplitterQueueDescriptor::ViewOrigin::m_Origin, SplitterQueueDescriptor::m_ViewOrigins, Map, MaxNumOfTensorDimensions, and Encoder< IType >::Set().

◆ Splitter()

void armnn::Splitter ( const SplitterQueueDescriptor data,
std::vector< ITensorHandle * >  inputs,
std::vector< ITensorHandle * >  outputs 
)

Definition at line 17 of file Splitter.hpp.

20 {
21  const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
22 
23  for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index)
24  {
25  unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
26 
27  unsigned int indexRemainder = index;
28  unsigned int dimensionStride = inputInfo0.GetNumElements();
29 
30  for (unsigned int i = 0; i<inputInfo0.GetNumDimensions(); i++)
31  {
32  dimensionStride /= inputInfo0.GetShape()[i];
33  indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
34  indexRemainder -= indices[i] * dimensionStride;
35  }
36 
37  for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
38  {
39  SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
40 
41  //Split view extents are defined by the size of (the corresponding) input tensor.
42  const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
43  ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions());
44 
45  // Check all dimensions to see if this element is inside the given input view.
46  bool insideView = true;
47  for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
48  {
49  if (indices[i] < view.m_Origin[i])
50  {
51  insideView = false;
52  }
53  if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
54  {
55  insideView = false;
56  }
57  }
58 
59  if (insideView)
60  {
61  unsigned int outIndex = 0;
62  unsigned int dimensionStride = 1;
63 
64  for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
65  {
66  outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
67  dimensionStride *= outputInfo.GetShape()[i];
68  }
69 
70  //We are within the view, to copy input data to the output corresponding to this view.
71  DataType* outputData = GetOutputTensorData<DataType>(viewIdx, data);
72  ARMNN_ASSERT(outputData);
73 
74  const DataType* inputData = GetInputTensorData<DataType>(0, data);
75  ARMNN_ASSERT(inputData);
76 
77  outputData[outIndex] = inputData[index];
78  }
79  }
80  }
81 }

References ARMNN_ASSERT, TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), SplitterQueueDescriptor::ViewOrigin::m_Origin, SplitterQueueDescriptor::m_ViewOrigins, and MaxNumOfTensorDimensions.

◆ Stack()

void Stack ( const StackQueueDescriptor data,
std::vector< std::unique_ptr< Decoder< float >>> &  inputs,
Encoder< float > &  output,
const TensorInfo inputInfo,
const TensorInfo outputInfo 
)

Definition at line 12 of file Stack.cpp.

17 {
18  unsigned int outputNumDims = outputInfo.GetNumDimensions();
19  unsigned int inputNumDims = inputInfo.GetNumDimensions();
20 
21  const armnn::TensorShape& outputDims = outputInfo.GetShape();
22  const armnn::TensorShape& inputDims = inputInfo.GetShape();
23 
24  unsigned int axis = data.m_Parameters.m_Axis;
25 
26  // Can perform a simple concatenation when axis == 0
27  if (!axis)
28  {
29  unsigned int numInputs = data.m_Parameters.m_NumInputs;
30  unsigned int inputLength = inputInfo.GetNumElements();
31 
32  for (unsigned int inputIdx=0; inputIdx<numInputs; ++inputIdx)
33  {
34  for (unsigned int elmt=0; elmt<inputLength; ++elmt)
35  {
36  (*inputs[inputIdx])[elmt];
37  output[(inputIdx * inputLength) + elmt];
38  output.Set(inputs[inputIdx]->Get());
39  }
40  }
41  return;
42  }
43 
44  const unsigned int iNumTensors = static_cast<unsigned int>(data.m_Inputs.size());
45  const unsigned int iBatchSize = inputDims[0];
46  const unsigned int iChannels = (inputNumDims > 1) ? inputDims[1] : 1;
47  const unsigned int iHeight = (inputNumDims > 2) ? inputDims[2] : 1;
48  const unsigned int iWidth = (inputNumDims > 3) ? inputDims[3] : 1;
49 
50  const unsigned int oBatchSize = outputDims[1];
51  const unsigned int oChannels = (outputNumDims > 2) ? outputDims[2] : 1;
52  const unsigned int oHeight = (outputNumDims > 3) ? outputDims[3] : 1;
53  const unsigned int oWidth = (outputNumDims > 4) ? outputDims[4] : 1;
54 
55  // Array to store the input coordinates
56  // iCoordinates[0] = i, iCoordinates[1] = bi, iCoordinates[2] = ci
57  // iCoordinates[3] = hi, iCoordinates[4] = wi, iCoordinates[5] = 0
58  // iCoordinates[5] will be always zero and used for not incrementing
59  // the output when the input has less than 4 dimensions
60  std::array<unsigned int, 6> iCoordinates{ 0 };
61 
62  // Array of pointers used to map the output coordinates to the input ones, in accordance with the axis
63  // This array is initialized with &iCoordinates[5] since this will be always zero
64  std::array<unsigned int *, 5> oCoordinates = { &iCoordinates[5],
65  &iCoordinates[5],
66  &iCoordinates[5],
67  &iCoordinates[5],
68  &iCoordinates[5] };
69 
70  // Set the axis coordinate
71  oCoordinates[axis] = &iCoordinates[0];
72 
73  // Map the output coordinates, accounting for the axis
74  unsigned int dim_shift = 0;
75  for(unsigned int dim = 0; dim < inputNumDims; ++dim)
76  {
77  if(dim == axis)
78  {
79  dim_shift++;
80  }
81  oCoordinates[dim + dim_shift] = &iCoordinates[dim + 1];
82  }
83 
84  // Alias for the input coordinates
85  unsigned int &i = iCoordinates[0];
86  unsigned int &bi = iCoordinates[1];
87  unsigned int &ci = iCoordinates[2];
88  unsigned int &hi = iCoordinates[3];
89  unsigned int &wi = iCoordinates[4];
90 
91  // Alias for the output coordinates
92  unsigned int &o = *(oCoordinates[0]);
93  unsigned int &bo = *(oCoordinates[1]);
94  unsigned int &co = *(oCoordinates[2]);
95  unsigned int &ho = *(oCoordinates[3]);
96  unsigned int &wo = *(oCoordinates[4]);
97 
98  // Stack tensors
99  for(; i < iNumTensors; ++(i))
100  {
101  for(bi = 0; bi < iBatchSize; ++(bi))
102  {
103  for(ci = 0; ci < iChannels; ++(ci))
104  {
105  for(hi = 0; hi < iHeight; ++(hi))
106  {
107  for(wi = 0; wi < iWidth; ++(wi))
108  {
109  output[o * oWidth * oHeight * oChannels * oBatchSize +
110  bo * oWidth * oHeight * oChannels +
111  co * oWidth * oHeight +
112  ho * oWidth +
113  wo];
114 
115  output.Set(inputs[i]->Get());
116 
117  ++(*(inputs[i]));
118  }
119  }
120  }
121  }
122  }
123 }

References TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), StackDescriptor::m_Axis, QueueDescriptor::m_Inputs, StackDescriptor::m_NumInputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

◆ StrEqual()

constexpr bool armnn::StrEqual ( const char *  strA,
const char(&)  strB[N] 
)
constexpr

Definition at line 201 of file TypesUtils.hpp.

202 {
203  bool isEqual = true;
204  for (unsigned i = 0; isEqual && (i < N); ++i)
205  {
206  isEqual = (strA[i] == strB[i]);
207  }
208  return isEqual;
209 }

Referenced by ParseComputeDevice().

◆ StridedSlice()

void StridedSlice ( const TensorInfo inputInfo,
const StridedSliceDescriptor params,
const void *  inputData,
void *  outputData,
unsigned int  dataTypeSize 
)

Definition at line 90 of file StridedSlice.cpp.

95 {
96  if (inputData == nullptr)
97  {
98  throw armnn::InvalidArgumentException("Slice: Null inputData pointer");
99  }
100  if (outputData == nullptr)
101  {
102  throw armnn::InvalidArgumentException("Slice: Null outputData pointer");
103  }
104 
105  const unsigned char* input = reinterpret_cast<const unsigned char*>(inputData);
106  unsigned char* output = reinterpret_cast<unsigned char*>(outputData);
107 
108  const TensorShape inputShape = ExtendShape(inputInfo.GetShape(), 4);
109 
110  StridedSliceDescriptor paddedParams = params;
111 
112  // Pad parameters to 4 dimensions
113  PadParams(paddedParams, 4);
114 
115  const int start0 = paddedParams.GetStartForAxis(inputShape, 0);
116  const int stop0 = paddedParams.GetStopForAxis (inputShape, 0, start0);
117 
118  const int start1 = paddedParams.GetStartForAxis(inputShape, 1);
119  const int stop1 = paddedParams.GetStopForAxis (inputShape, 1, start1);
120 
121  const int start2 = paddedParams.GetStartForAxis(inputShape, 2);
122  const int stop2 = paddedParams.GetStopForAxis (inputShape, 2, start2);
123 
124  const int start3 = paddedParams.GetStartForAxis(inputShape, 3);
125  const int stop3 = paddedParams.GetStopForAxis (inputShape, 3, start3);
126 
127  const int step = armnn::numeric_cast<int>(dataTypeSize);
128 
129  for (int in0 = start0;
130  !LoopCondition(in0, stop0, paddedParams.m_Stride[0]);
131  in0 += paddedParams.m_Stride[0])
132  {
133  for (int in1 = start1;
134  !LoopCondition(in1, stop1, paddedParams.m_Stride[1]);
135  in1 += paddedParams.m_Stride[1])
136  {
137  for (int in2 = start2;
138  !LoopCondition(in2, stop2, paddedParams.m_Stride[2]);
139  in2 += paddedParams.m_Stride[2])
140  {
141  for (int in3 = start3;
142  !LoopCondition(in3, stop3, paddedParams.m_Stride[3]);
143  in3 += paddedParams.m_Stride[3])
144  {
145  int dim1 = armnn::numeric_cast<int>(inputShape[1]);
146  int dim2 = armnn::numeric_cast<int>(inputShape[2]);
147  int dim3 = armnn::numeric_cast<int>(inputShape[3]);
148 
149  int inputOffset = (((in0 * dim1 + in1) * dim2 + in2) * dim3 + in3) * step;
150  ::memcpy(output, input + inputOffset, dataTypeSize);
151  output += step;
152  }
153  }
154  }
155  }
156 }

References TensorInfo::GetShape(), StridedSliceDescriptor::GetStartForAxis(), StridedSliceDescriptor::GetStopForAxis(), and StridedSliceDescriptor::m_Stride.

◆ StringToLogLevel()

LogSeverity armnn::StringToLogLevel ( std::string  level)
inline

Definition at line 43 of file Logging.hpp.

44 {
45  // Transfer to lower case
46  std::transform(level.begin(), level.end(), level.begin(),
47  [](unsigned char c){ return std::tolower(c); }
48  );
49 
50  if (level == "trace")
51  {
52  return LogSeverity::Trace;
53  }
54  else if (level == "debug")
55  {
56  return LogSeverity::Debug;
57  }
58  else if (level == "info")
59  {
60  return LogSeverity::Info;
61  }
62  else if (level == "warning")
63  {
64  return LogSeverity::Warning;
65  }
66  else if (level == "error")
67  {
68  return LogSeverity::Error;
69  }
70  else if (level == "fatal")
71  {
72  return LogSeverity::Fatal;
73  }
74  else
75  {
76  throw armnn::Exception("Unknown severity level for logging: '" + level +
77  "'. Valid options: trace, debug, info, warning, error, fatal");
78  }
79 }

References Debug, Error, Fatal, Info, Trace, and Warning.

◆ swap() [1/2]

void armnn::swap ( OriginsDescriptor first,
OriginsDescriptor second 
)

Definition at line 356 of file Descriptors.cpp.

357 {
358  using std::swap;
359  swap(first.m_NumViews, second.m_NumViews);
360  swap(first.m_NumDimensions, second.m_NumDimensions);
361  swap(first.m_ViewOrigins, second.m_ViewOrigins);
362  swap(first.m_ConcatAxis, second.m_ConcatAxis);
363 }

References swap().

Referenced by BackendId::operator=(), SquashEqualSiblingsImpl< Comparable >::Run(), BackendRegistry::Swap(), and swap().

◆ swap() [2/2]

void armnn::swap ( ViewsDescriptor first,
ViewsDescriptor second 
)

Definition at line 365 of file Descriptors.cpp.

366 {
367  using std::swap;
368  swap(first.m_Origins, second.m_Origins);
369  swap(first.m_ViewSizes, second.m_ViewSizes);
370  swap(first.m_IsAxisSet, second.m_IsAxisSet);
371  swap(first.m_Axis, second.m_Axis);
372 }

References swap().

Referenced by swap().

◆ Tile()

void Tile ( const TileDescriptor params,
const TensorInfo inputInfo,
Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 45 of file Tile.cpp.

49 {
50  // Input and output will always have same rank
51  uint32_t rank = inputInfo.GetNumDimensions();
52 
53  TensorShape inputShape = inputInfo.GetShape();
54 
55  std::vector<uint32_t> outputShape(rank);
56  for (uint32_t i = 0; i < rank; ++i)
57  {
58  outputShape[i] = inputShape[i] * params.m_Multiples[i];
59  }
60 
61  // If all values of multiples are 1, then return the input
62  if ( std::adjacent_find( params.m_Multiples.begin(), params.m_Multiples.end(),
63  std::not_equal_to<>() ) == params.m_Multiples.end() && params.m_Multiples[0] == 1)
64  {
65  for (uint32_t idx = 0; idx < inputInfo.GetNumElements(); ++idx)
66  {
67  float inputValue = inputDecoder.Get();
68  ++inputDecoder;
69  outputEncoder.Set(inputValue);
70  ++outputEncoder;
71  }
72  return;
73  }
74 
75  std::vector<float> inputData = inputDecoder.DecodeTensor(inputInfo.GetShape());
76  std::vector<float> outputData;
77  auto outputNumElements = inputData.size() * static_cast<uint32_t>(std::accumulate(begin(params.m_Multiples),
78  end(params.m_Multiples),
79  1,
80  std::multiplies<>()));
81  outputData.reserve(outputNumElements);
82 
83  for (uint32_t outputIndex = 0; outputIndex < outputNumElements; ++outputIndex)
84  {
85  std::vector<uint32_t> outputCoords = IndexToCoordinates(outputShape, outputIndex);
86 
87  // Converting output coordinates to input coordinates using modulus
88  std::vector<uint32_t> inputCoordinates;
89  inputCoordinates.reserve(rank);
90  for (uint32_t i = 0; i < rank; ++i)
91  {
92  inputCoordinates.push_back(outputCoords[i] % inputShape[i]);
93  }
94 
95  uint32_t inputIndex = CoordinatesToIndex(inputShape, inputCoordinates);
96 
97  outputEncoder[outputIndex];
98  outputEncoder.Set(inputData[inputIndex]);
99  }
100 }

References CoordinatesToIndex(), Decoder< IType >::DecodeTensor(), Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), IndexToCoordinates(), TileDescriptor::m_Multiples, and Encoder< IType >::Set().

◆ TopKSort()

void TopKSort ( unsigned int  k,
unsigned int *  indices,
const float *  values,
unsigned int  numElement 
)

Definition at line 25 of file DetectionPostProcess.cpp.

26 {
27  std::partial_sort(indices, indices + k, indices + numElement,
28  [&values](unsigned int i, unsigned int j) { return values[i] > values[j]; });
29 }

Referenced by DetectionPostProcess(), and NonMaxSuppression().

◆ TosaRefBackendId()

constexpr const char* armnn::TosaRefBackendId ( )
constexpr

Definition at line 10 of file TosaRefBackendId.hpp.

10 { return "TosaRef"; }

Referenced by TosaRefBackend::GetIdStatic().

◆ TosaRefPreCompiledWorkloadValidate()

bool TosaRefPreCompiledWorkloadValidate ( std::string *  )

Definition at line 166 of file TosaRefPreCompiledWorkload.cpp.

167 {
168  return true;
169 }

◆ TosaRefTensorHandleFactoryId()

constexpr const char* armnn::TosaRefTensorHandleFactoryId ( )
constexpr

Definition at line 15 of file TosaRefTensorHandleFactory.hpp.

15 { return "Arm/TosaRef/TensorHandleFactory"; }

Referenced by TosaRefTensorHandleFactory::GetIdStatic().

◆ TransposeConvolution2dImpl()

void TransposeConvolution2dImpl ( const TransposeConvolution2dDescriptor descriptor,
const TensorShape inputShape,
Decoder< float > &  inputDecoder,
const TensorShape outputShape,
Encoder< float > &  outputEncoder,
const TensorShape weightsShape,
Decoder< float > &  weightsDecoder,
Decoder< float > *  biasesDecoder 
)

Definition at line 15 of file TransposeConvolution2d.cpp.

23 {
24  if (descriptor.m_BiasEnabled && !biasesDecoder)
25  {
26  throw InvalidArgumentException("Biases enabled but no bias data provided");
27  }
28  const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
29  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
30  const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
31  const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
32 
33  const unsigned int numBatches = inputShape[0];
34 
35  const unsigned int inputWidth = inputShape[widthIndex];
36  const unsigned int inputHeight = inputShape[heightIndex];
37  const unsigned int inputDepth = inputShape[channelsIndex];
38 
39  const unsigned int weightsHeight = weightsShape[heightIndex];
40  const unsigned int weightsWidth = weightsShape[widthIndex];
41  const unsigned int weightsDepth = weightsShape[channelsIndex];
42 
43  const unsigned int outputHeight = outputShape[heightIndex];
44  const unsigned int outputWidth = outputShape[widthIndex];
45  const unsigned int outputDepth = outputShape[channelsIndex];
46 
47  const unsigned int paddingLeft = descriptor.m_PadLeft;
48  const unsigned int paddingTop = descriptor.m_PadTop;
49 
50  const unsigned int strideX = descriptor.m_StrideX;
51  const unsigned int strideY = descriptor.m_StrideY;
52 
53  std::vector<float> outputBuffer(outputShape.GetNumElements(), 0);
54 
55  const std::vector<float> inputVec = inputDecoder.DecodeTensor(inputShape);
56  const std::vector<float> filterVec = weightsDecoder.DecodeTensor(weightsShape);
57 
58  for (unsigned int batch = 0u; batch < numBatches; ++batch)
59  {
60  for (unsigned int yInput = 0u; yInput < inputHeight; ++yInput)
61  {
62  for (unsigned int xInput = 0u; xInput < inputWidth; ++xInput)
63  {
64  unsigned int xOutputOrigin = xInput * strideX - paddingLeft;
65  unsigned int yOutputOrigin = yInput * strideY - paddingTop;
66 
67  for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
68  {
69  for (unsigned int yWeights = 0u; yWeights < weightsHeight; ++yWeights)
70  {
71  for (unsigned int xWeights = 0u; xWeights < weightsWidth; ++xWeights)
72  {
73  unsigned int yOutput = yOutputOrigin + yWeights;
74  unsigned int xOutput = xOutputOrigin + xWeights;
75 
76  if (yOutput < outputHeight && xOutput< outputWidth)
77  {
78  for (unsigned int dInput = 0u; dInput < inputDepth; dInput++)
79  {
80  unsigned int inputIndex;
81  unsigned int outputIndex;
82  unsigned int weightsIndex;
83 
84  if(descriptor.m_DataLayout == armnn::DataLayout::NHWC)
85  {
86  inputIndex = batch * inputHeight * inputWidth * inputDepth +
87  yInput * inputWidth * inputDepth +
88  xInput * inputDepth +
89  dInput;
90 
91  weightsIndex = dOutput * weightsHeight * weightsWidth * weightsDepth +
92  yWeights * weightsWidth * weightsDepth +
93  xWeights * weightsDepth +
94  dInput;
95 
96  outputIndex = batch * outputHeight * outputWidth * outputDepth +
97  yOutput * outputWidth * outputDepth +
98  xOutput * outputDepth +
99  dOutput;
100  }
101  else
102  {
103  inputIndex = batch * inputDepth * inputHeight * inputWidth +
104  dInput * inputHeight * inputWidth +
105  yInput * inputWidth +
106  xInput;
107 
108  weightsIndex = dOutput * weightsDepth * weightsHeight * weightsWidth +
109  dInput * weightsHeight * weightsWidth +
110  yWeights * weightsWidth +
111  xWeights;
112 
113  outputIndex = batch * outputDepth * outputHeight * outputWidth +
114  dOutput * outputHeight * outputWidth +
115  yOutput * outputWidth +
116  xOutput;
117  }
118 
119  outputBuffer[outputIndex] += inputVec[inputIndex] * filterVec[weightsIndex];
120  }
121  }
122  }
123  }
124 
125  }
126  }
127  }
128  }
129 
130  // Apply bias (if enabled)
131  if (descriptor.m_BiasEnabled)
132  {
133  outputEncoder[0];
134  Decoder<float>& rBiasesDecoder = *biasesDecoder;
135 
136  for (unsigned int batch = 0u; batch < numBatches; ++batch)
137  {
138  for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
139  {
140  rBiasesDecoder[dOutput];
141  for (unsigned int yOutput = 0u; yOutput < outputHeight; ++yOutput)
142  {
143  for (unsigned int xOutput = 0u; xOutput < outputWidth; ++xOutput)
144  {
145  const unsigned int outputIndex =
146  dataLayoutIndexed.GetIndex(outputShape, batch, dOutput, yOutput, xOutput);
147  outputBuffer[outputIndex] += rBiasesDecoder.Get();
148  }
149  }
150  }
151  }
152  }
153  outputEncoder[0];
154  for (float output : outputBuffer)
155  {
156  outputEncoder.Set(output);
157  ++outputEncoder;
158  }
159 }

References Decoder< IType >::DecodeTensor(), Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorShape::GetNumElements(), DataLayoutIndexed::GetWidthIndex(), TransposeConvolution2dDescriptor::m_BiasEnabled, TransposeConvolution2dDescriptor::m_DataLayout, TransposeConvolution2dDescriptor::m_PadLeft, TransposeConvolution2dDescriptor::m_PadTop, TransposeConvolution2dDescriptor::m_StrideX, TransposeConvolution2dDescriptor::m_StrideY, NHWC, and Encoder< IType >::Set().

◆ TrueFunc()

bool armnn::TrueFunc ( Optional< std::string & >  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 54 of file LayerSupportCommon.hpp.

55 {
56  IgnoreUnused(reasonIfUnsupported);
57  IgnoreUnused(params...);
58  return true;
59 }

References IgnoreUnused().

◆ ValidateSourcesMatchOptimizedNetwork()

void armnn::ValidateSourcesMatchOptimizedNetwork ( std::vector< BackendOptions optimizedOptions,
const INetworkProperties networkProperties 
)

This function performs a sanity check to ensure that the combination of input and output memory source matches the values for importEnabled and exportEnabled that were specified during optimization.

During optimization the tensor handle factories are chosen based on whether import and export are enabled. If the user then specifies something incompatible here it can lead to problems.

Parameters
optimizedOptions
networkProperties

Definition at line 98 of file LoadedNetwork.cpp.

100 {
101  // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
102  // added as backend options.
103  const vector<BackendOptions>::iterator& backendItr =
104  find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
105  if (backend.GetBackendId().Get() == "Global")
106  {
107  return true;
108  }
109  else
110  {
111  return false;
112  }
113  });
114  bool importEnabled = false;
115  bool exportEnabled = false;
116  if (backendItr != optimizedOptions.end())
117  {
118  // Find the importEnabled and exportEnabled values.
119  for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
120  {
121  const BackendOptions::BackendOption& option = backendItr->GetOption(i);
122  if (option.GetName() == "ImportEnabled")
123  {
124  importEnabled = option.GetValue().AsBool();
125  }
126  if (option.GetName() == "ExportEnabled")
127  {
128  exportEnabled = option.GetValue().AsBool();
129  }
130  }
131  }
132 
133  // Now that we have values for import and export compare them to the MemorySource variables.
134  // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
135  if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
136  (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
137  {
138  auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
139  if (!importEnabled)
140  {
141  message.append(" requires that memory import be enabled. However, "
142  "it was disabled when this network was optimized.");
143  }
144  else
145  {
146  message.append(" requires that memory import be disabled. However, "
147  "it was enabled when this network was optimized.");
148  }
149  throw InvalidArgumentException(message);
150  }
151 
152  if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
153  (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
154  {
155  auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
156  if (!exportEnabled)
157  {
158  message.append(" requires that memory export be enabled. However, "
159  "it was disabled when this network was optimized.");
160  }
161  else
162  {
163  message.append(" requires that memory export be disabled. However, "
164  "it was enabled when this network was optimized.");
165  }
166  throw InvalidArgumentException(message);
167  }
168 } // anonymous

◆ VerifyClContextBuffer()

bool armnn::VerifyClContextBuffer ( flatbuffers::Verifier &  verifier)
inline

Definition at line 157 of file ClContextSchema_generated.h.

158  {
159  return verifier.VerifyBuffer<armnn::ClContext>(ClContextIdentifier());
160 }

References ClContextIdentifier().

◆ VerifySizePrefixedClContextBuffer()

bool armnn::VerifySizePrefixedClContextBuffer ( flatbuffers::Verifier &  verifier)
inline

Definition at line 162 of file ClContextSchema_generated.h.

163  {
164  return verifier.VerifySizePrefixedBuffer<armnn::ClContext>(ClContextIdentifier());
165 }

References ClContextIdentifier().

◆ VerifyTensorInfoDataType()

void armnn::VerifyTensorInfoDataType ( const armnn::TensorInfo info,
armnn::DataType  dataType 
)
inline

Definition at line 382 of file TypesUtils.hpp.

383 {
384  if (info.GetDataType() != dataType)
385  {
386  std::stringstream ss;
387  ss << "Unexpected datatype:" << armnn::GetDataTypeName(info.GetDataType())
388  << " for tensor:" << info.GetShape()
389  << ". The type expected to be: " << armnn::GetDataTypeName(dataType);
390  throw armnn::Exception(ss.str());
391  }
392 }

References GetDataTypeName(), and info.

◆ WrapClError()

RuntimeException WrapClError ( const cl::Error &  clError,
const CheckLocation location 
)
inline

Definition at line 160 of file ClWorkloadUtils.hpp.

161 {
162  std::stringstream message;
163  message << "CL error: " << clError.what() << ". Error code: " << clError.err();
164 
165  return RuntimeException(message.str(), location);
166 }

References Exception::what().

Referenced by RunClFunction().

Variable Documentation

◆ cpuAccCapabilities

const BackendCapabilities cpuAccCapabilities("CpuAcc", { {"NonConstWeights", true}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", arm_compute::CPUInfo::get().has_fp16()} })

◆ cpuRefCapabilities

const BackendCapabilities cpuRefCapabilities("CpuRef", { {"NonConstWeights", true}, {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true}, {"HasFp16", true} })

◆ EXPIRE_RATE

constexpr unsigned int EXPIRE_RATE = 3U
constexpr

Variable to control expire rate of priority queue.

Definition at line 37 of file Types.hpp.

◆ g_AggregateProfilingEventsByInference

constexpr bool g_AggregateProfilingEventsByInference = true
constexpr

Definition at line 37 of file Profiling.cpp.

Referenced by ProfilerImpl::AnalyzeEventsAndWriteResults().

◆ g_ProfilingEventCountHint

constexpr std::size_t g_ProfilingEventCountHint = 1024
constexpr

Definition at line 29 of file Profiling.cpp.

Referenced by ProfilerImpl::ProfilerImpl().

◆ g_WriteProfilingEventSequence

constexpr bool g_WriteProfilingEventSequence = true
constexpr

Definition at line 32 of file Profiling.cpp.

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults().

◆ g_WriteReportToStdOutOnProfilerDestruction

constexpr bool g_WriteReportToStdOutOnProfilerDestruction = false
constexpr

Definition at line 41 of file Profiling.cpp.

Referenced by ProfilerImpl::~ProfilerImpl().

◆ gpuFsaCapabilities

const BackendCapabilities gpuFsaCapabilities("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", false} })

◆ LOWEST_CAPTURE_PERIOD

constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u
constexpr

The lowest performance data capture interval we support is 10 miliseconds.

Definition at line 34 of file Types.hpp.

◆ MaxNumOfTensorDimensions

◆ oldCpuRefCapabilities

const std::set<armnn::BackendCapability> oldCpuRefCapabilities
Initial value:

Definition at line 25 of file RefBackend.hpp.

◆ paddingRequiredLayers

const std::set<armnn::LayerType> paddingRequiredLayers
Initial value:
{
LayerType::Convolution2d,
LayerType::DepthwiseConvolution2d,
LayerType::Lstm,
LayerType::Mean,
LayerType::QuantizedLstm,
LayerType::TransposeConvolution2d
}

Definition at line 16 of file NeonTensorHandleFactory.hpp.

Referenced by NeonTensorHandleFactory::GetCapabilities().

◆ tl_Profiler

thread_local IProfiler* tl_Profiler = nullptr

◆ wordSize

constexpr size_t wordSize = sizeof(size_t) * 8
constexpr

Definition at line 22 of file SingleAxisPriorityList.cpp.

ARMNN_ASSERT
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
armnn::ArgMinMaxFunction::Max
@ Max
armnn::TuningLevel::Exhaustive
@ Exhaustive
armnn::MemorySource::Malloc
@ Malloc
armnn::CapabilityClass::FallbackImportDisabled
@ FallbackImportDisabled
armnnUtils::Transpose
void Transpose(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Transpose.cpp:153
armnn::PaddingMode::Symmetric
@ Symmetric
armnn::NeonMinimumWorkloadValidate
arm_compute::Status NeonMinimumWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
Validate function for validating the inputs and output.
Definition: NeonMinimumWorkload.cpp:15
armnn::GetBinaryOperationAsCString
constexpr char const * GetBinaryOperationAsCString(BinaryOperation operation)
Definition: TypesUtils.hpp:76
IS_MULTI_AXES_REDUCE_SUPPORTED
#define IS_MULTI_AXES_REDUCE_SUPPORTED(func, input, desc, status)
Macro function check if layer with multiple axes is supported on each backend.
Definition: ArmComputeUtils.hpp:400
armnn::optimizations::InsertDebugToFileLayer
OptimizeForType< Layer, AddDebugToFileImpl > InsertDebugToFileLayer
Definition: AddDebug.hpp:54
VectorVectorCwiseProduct
void VectorVectorCwiseProduct(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:187
armnn::IOptimizedNetworkPtr
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:340
armnn::MemBlockStrategyType::MultiAxisPacking
@ MultiAxisPacking
armnn::ApplyBackendOptimizations
OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, BackendsMap &backends, const ModelOptions &modelOptions, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:1328
armnn::Compute::Undefined
@ Undefined
armnn::BinaryOperation::Mul
@ Mul
armnn::DataType::Boolean
@ Boolean
armnn::Pooling2dDescriptor::m_PaddingMethod
PaddingMethod m_PaddingMethod
The padding method to be used. (Exclude, IgnoreValue).
Definition: Descriptors.hpp:425
armnn::optimizations::InsertDebugLayer
OptimizeForType< Layer, AddDebugImpl > InsertDebugLayer
Definition: AddDebug.hpp:53
armnn::GenerateRangeK
std::vector< unsigned int > GenerateRangeK(unsigned int k)
Definition: DetectionPostProcess.cpp:18
armnn::LayerType::Permute
@ Permute
armnn::optimizations::FuseBatchNormIntoConvolution2DFloat32
OptimizeForExclusiveConnection< Convolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< Convolution2dLayer, armnn::DataType::Float32 > > FuseBatchNormIntoConvolution2DFloat32
Definition: FuseBatchNorm.hpp:222
armnn::NormalizationAlgorithmChannel::Within
@ Within
armnn::BinaryOperation::Add
@ Add
armnn::NeonAdditionWorkloadValidate
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: NeonAdditionWorkload.cpp:20
armnn::BackendOptions::Var::AsInt
int AsInt() const
Definition: BackendOptions.hpp:120
armnn::Convert1HWOTensorInfoToAcl
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl(const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo...
Definition: WorkloadUtils.cpp:176
CONSTRUCT_IN_PLACE
#define CONSTRUCT_IN_PLACE
Definition: Optional.hpp:41
armnn::ConvertActivationDescriptorToAclActivationLayerInfo
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor *activationDescPtr)
Definition: ArmComputeUtils.hpp:94
armnn::ComparisonOperation::LessOrEqual
@ LessOrEqual
armnn::MakeInfo
arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor &descriptor)
Definition: NeonDetectionPostProcessWorkload.cpp:17
armnn::GetLayerTypeAsCString
const char * GetLayerTypeAsCString(LayerType type)
Definition: InternalTypes.cpp:13
armnn::Encoder::Set
virtual void Set(IType right)=0
armnn::NeonMultiplicationWorkloadValidate
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: NeonMultiplicationWorkload.cpp:19
armnn::Activation
void Activation(Decoder< float > &in, Encoder< float > &out, const TensorInfo &tensorInfo, ActivationFunction function, float a, float b)
Definition: Activation.cpp:102
armnn::DataLayout::NCDHW
@ NCDHW
armnn::SetClStridedSliceData
auto SetClStridedSliceData(const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
Definition: ClWorkloadUtils.hpp:79
armnn::IsQuantized8BitType
constexpr bool IsQuantized8BitType(DataType dataType)
Definition: TypesUtils.hpp:316
armnn::Compute::GpuAcc
@ GpuAcc
GPU Execution: OpenCL: ArmCompute.
armnn::ActivationFunction::LeakyReLu
@ LeakyReLu
MeanStddevNormalization
void MeanStddevNormalization(armnn::Decoder< float > &input_vector, armnn::Encoder< float > &output_vector, uint32_t v_size, uint32_t n_batch, float normalization_epsilon)
Definition: LstmUtils.cpp:40
VectorBatchVectorCwiseProductAccumulate
void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:131
armnn::MemorySource::Gralloc
@ Gralloc
armnn::DataLayout
DataLayout
Definition: Types.hpp:62
armnn::InsertConvertFp16ToFp32LayersBefore
std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
Definition: NetworkUtils.cpp:40
armnn::Pooling3dDescriptor::m_PadTop
uint32_t m_PadTop
Padding top value in the height dimension.
Definition: Descriptors.hpp:479
armnn::SpaceToBatchNdDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:1071
armnn::Splitter
void Splitter(const SplitterQueueDescriptor &data, std::vector< ITensorHandle * > inputs, std::vector< ITensorHandle * > outputs)
Definition: Splitter.hpp:17
armnn::Append
void Append(Optimizer::Optimizations &optimizations, Front &&front, Others &&... others)
Definition: Optimizer.hpp:36
armnn::CollapseLeadingUnitDimensions
bool CollapseLeadingUnitDimensions(const TensorInfo &in, TensorInfo &out)
Definition: NeonBackendOptimizationUtils.hpp:14
armnn::DepthwiseConvolution2dDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:708
armnn::Pooling2dDescriptor::m_PoolHeight
uint32_t m_PoolHeight
Pooling height value.
Definition: Descriptors.hpp:417
armnn::QosExecPriority::Medium
@ Medium
armnn::optimizations::OptimizeInversePermutes
OptimizeForConnection< PermuteLayer, PermuteLayer, OptimizeInversePermutesImpl< PermuteLayer > > OptimizeInversePermutes
Definition: OptimizeInversePermutes.hpp:43
armnn::DataLayout::NHWC
@ NHWC
armnn::ActivationFunction::SoftReLu
@ SoftReLu
armnn::ConvertResizeMethodToAclInterpolationPolicy
arm_compute::InterpolationPolicy ConvertResizeMethodToAclInterpolationPolicy(ResizeMethod resizeMethod)
Definition: ArmComputeUtils.hpp:213
armnn::optimizations::TransposeAndBatchToSpaceAsDepthToSpace
OptimizeForConnection< TransposeLayer, BatchToSpaceNdLayer, PermuteAndBatchToSpaceAsDepthToSpaceImpl< TransposeLayer > > TransposeAndBatchToSpaceAsDepthToSpace
Definition: PermuteAndBatchToSpaceAsDepthToSpace.hpp:104
armnn::Convolution3dDescriptor::m_PadFront
uint32_t m_PadFront
Padding front value in the depth dimension.
Definition: Descriptors.hpp:637
VectorBatchVectorAdd
void VectorBatchVectorAdd(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:16
armnn::EdgeStrategy::DirectCompatibility
@ DirectCompatibility
No strategy has been defined. Used internally to verify integrity of optimizations.
armnn::CalculateSlotOption
ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap &backends, OutputSlot &outputSlot, TensorHandleFactoryRegistry &registry, bool exportEnabled)
Definition: Network.cpp:1573
armnn::TuningLevel::None
@ None
armnn::Pooling2d
void Pooling2d(Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling2dDescriptor &params)
Computes the Pooling2d operation.
Definition: Pooling2d.cpp:142
armnn::LogSeverity::Trace
@ Trace
armnn::BackendCapability::NonConstWeights
@ NonConstWeights
Constant weights can be accessed through the descriptors, On the other hand, non-const weights can be...
armnn::optimizations::FoldPadIntoPooling2d
OptimizeForExclusiveConnection< PadLayer, Pooling2dLayer, pad_fold::FoldPadIntoPooling2dImpl > FoldPadIntoPooling2d
Definition: FoldPadIntoLayer2d.hpp:283
armnn::Compute::CpuRef
@ CpuRef
CPU Execution: Reference C++ kernels.
armnn::NeonSubtractionWorkloadValidate
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: NeonSubtractionWorkload.cpp:22
armnn::optimizations::Fp32NetworkToFp16Converter
OptimizeForType< Layer, ConvertFp32NetworkToFp16Impl > Fp32NetworkToFp16Converter
Definition: ConvertFp32NetworkToFp16.hpp:87
armnnUtils::GetUnsignedAxis
unsigned int GetUnsignedAxis(const unsigned int inputDimension, const int axis)
Definition: TensorUtils.cpp:236
armnn::ViewsDescriptor::HasAxis
bool HasAxis() const
Returns true if an axis has been set.
Definition: Descriptors.cpp:387
armnn::GetStatusAsCString
constexpr char const * GetStatusAsCString(Status status)
Definition: TypesUtils.hpp:21
armnn::ConvertAdditionalInfoToAclActivationLayerInfo
arm_compute::ActivationLayerInfo ConvertAdditionalInfoToAclActivationLayerInfo(const QueueDescriptor &queueDescriptor)
Definition: ArmComputeUtils.hpp:105
armnn::IsLayerOptimizable
bool IsLayerOptimizable(const armnn::Layer &layer)
Definition: MockBackend.cpp:99
armnn::TensorShape::GetDimensionSpecificity
bool GetDimensionSpecificity(unsigned int i) const
Gets information about if the dimension size has been specified or not.
Definition: Tensor.cpp:211
ARMNN_SCOPED_PROFILING_EVENT_CL
#define ARMNN_SCOPED_PROFILING_EVENT_CL(name)
Definition: ClWorkloadUtils.hpp:21
armnn::DepthwiseConvolution2dDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:710
armnn::ActivationFunction::Sqrt
@ Sqrt
armnn::GetTimeNow
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
armnn::JsonObjectType::Measurement
@ Measurement
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::CreateProgram
flatbuffers::Offset< Program > CreateProgram(flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::String > name=0, flatbuffers::Offset< flatbuffers::Vector< uint8_t >> binary=0)
Definition: ClContextSchema_generated.h:118
armnn::optimizations::FoldPadIntoConvolution2d
OptimizeForExclusiveConnection< PadLayer, Convolution2dLayer, pad_fold::FoldPadIntoConvolution2dImpl > FoldPadIntoConvolution2d
Definition: FoldPadIntoLayer2d.hpp:277
armnn::NeonMaximumWorkloadValidate
arm_compute::Status NeonMaximumWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
Definition: NeonMaximumWorkload.cpp:14
armnn::AllocateOutputData
void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const std::vector< float > &boxCorners, const std::vector< unsigned int > &outputIndices, const std::vector< unsigned int > &selectedBoxes, const std::vector< unsigned int > &selectedClasses, const std::vector< float > &selectedScores, float *detectionBoxes, float *detectionScores, float *detectionClasses, float *numDetections)
Definition: DetectionPostProcess.cpp:103
armnn::MemorySource::DmaBufProtected
@ DmaBufProtected
armnn::NormalizationAlgorithmMethod::LocalBrightness
@ LocalBrightness
Krichevsky 2012: Local Brightness Normalization.
armnn::IsSequenceLayerType
bool IsSequenceLayerType(Layer &layer, LayerType type)
Definition: SubgraphUtils.hpp:362
armnn::BinaryOperation::Sub
@ Sub
armnn::PermuteTensor
armnn::ConstTensor PermuteTensor(const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
Definition: WorkloadUtils.cpp:18
armnn::GetDataTypeName
constexpr const char * GetDataTypeName(DataType dataType)
Definition: TypesUtils.hpp:233
armnn::SetNeonStridedSliceData
auto SetNeonStridedSliceData(const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
Definition: NeonWorkloadUtils.hpp:139
armnn::optimizations::ConvertConstDequantisationLayersToConstLayers
OptimizeForConnection< ConstantLayer, DequantizeLayer, ConvertConstDequantisationLayersToConstLayersImpl > ConvertConstDequantisationLayersToConstLayers
Definition: ConvertConstDequantisationLayersToConstLayers.hpp:173
armnn::OutputShapeRounding::Floor
@ Floor
armnn::Pooling2dDescriptor::m_StrideY
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
Definition: Descriptors.hpp:421
armnn::TensorInfo::GetNumDimensions
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:197
armnn::BatchToSpaceNdDescriptor::m_BlockShape
std::vector< unsigned int > m_BlockShape
Block shape values.
Definition: Descriptors.hpp:898
armnn::Reduce
void Reduce(const TensorInfo &inputInfo, const TensorInfo &outputInfo, Decoder< float > &input, Encoder< float > &output, const std::vector< uint32_t > axis, const ReduceOperation reduceOperation)
Definition: Reduce.cpp:70
CHECK_LOCATION
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
armnn::SetLogFilter
void SetLogFilter(LogSeverity level)
Definition: Logging.cpp:73
armnnUtils::DataLayoutIndexed
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout.
Definition: DataLayoutIndexed.hpp:17
armnn::optimizations::MoveTransposeUp
OptimizeForConnection< Layer, TransposeLayer, MoveTransposeUpImpl > MoveTransposeUp
Definition: MoveTransposeUp.hpp:83
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
armnn::GraphEvent::LayerAdded
@ LayerAdded
armnn::DataType::Float32
@ Float32
armnn::GetDataLayoutName
constexpr const char * GetDataLayoutName(DataLayout dataLayout)
Definition: TypesUtils.hpp:253
armnnUtils::DataLayoutIndexed::GetDataLayout
armnn::DataLayout GetDataLayout() const
Definition: DataLayoutIndexed.hpp:22
armnn::ActivationFunction::TanH
@ TanH
VectorBatchVectorCwiseProduct
void VectorBatchVectorCwiseProduct(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:152
ClipVector
void ClipVector(armnn::Decoder< float > &vector, uint32_t vSize, float absLimit, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:229
armnn::LogSoftmax
void LogSoftmax(Decoder< float > &input, Encoder< float > &output, const TensorInfo &inputInfo, const LogSoftmaxDescriptor &descriptor)
Definition: LogSoftmax.cpp:29
armnn::GpuFsaBackendId
constexpr const char * GpuFsaBackendId()
Definition: GpuFsaBackendId.hpp:10
armnn::LogicalBinaryOperation::LogicalOr
@ LogicalOr
armnn::optimizations::BroadcastToOptimizationLayer
OptimizeForType< BroadcastToLayer, DeleteBroadcastToImpl > BroadcastToOptimizationLayer
Definition: DeleteBroadcastTo.hpp:38
armnn::Pooling2dDescriptor::m_PadTop
uint32_t m_PadTop
Padding top value in the height dimension.
Definition: Descriptors.hpp:411
armnn::Convolution3dDescriptor::m_PadTop
uint32_t m_PadTop
Padding top value in the height dimension.
Definition: Descriptors.hpp:633
ARMNN_NO_DEPRECATE_WARN_BEGIN
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
armnn::PoolingAlgorithm::L2
@ L2
armnn::SpaceToBatchNdDescriptor::m_BlockShape
std::vector< unsigned int > m_BlockShape
Block shape value.
Definition: Descriptors.hpp:1066
MatrixBatchVectorMultiplyAccumulate
void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder< float > &matrix, uint32_t mRows, uint32_t mCols, armnn::Decoder< float > &vector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:87
armnn::Convolution3dDescriptor::m_DilationX
uint32_t m_DilationX
Dilation along x axis.
Definition: Descriptors.hpp:647
armnn::PaddingMode
PaddingMode
The padding mode controls whether the padding should be filled with constant values (Constant),...
Definition: Types.hpp:200
armnn::Convolution3dDescriptor::m_PadBottom
uint32_t m_PadBottom
Padding bottom value in the height dimension.
Definition: Descriptors.hpp:635
armnn::MaxNumOfTensorDimensions
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:31
armnn::DataType::QAsymmU8
@ QAsymmU8
armnn::QosExecPriority::High
@ High
armnn::LogSeverity::Info
@ Info
armnn::ActivationFunction::BoundedReLu
@ BoundedReLu
min(a, max(b, input)) ReLu1 & ReLu6.
armnn::minimum
Definition: Minimum.hpp:12
armnn::optimizations::PermuteAsReshape
OptimizeForType< PermuteLayer, PermuteAsReshapeImpl > PermuteAsReshape
Definition: PermuteAsReshape.hpp:66
armnn::DataType::QSymmS8
@ QSymmS8
armnn::ReportWarning
void ReportWarning(const std::string &warningMessage, Optional< std::vector< std::string > & > warningMessages)
Definition: Network.cpp:768
armnnUtils::Permute
void Permute(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Permute.cpp:164
armnn::Half
half_float::half Half
Definition: Half.hpp:22
armnn::ComputeConv3DInfo
arm_compute::Conv3dInfo ComputeConv3DInfo(const armnn::Convolution3dDescriptor descriptor, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor.
Definition: ArmComputeUtils.hpp:288
armnn::Pooling3dDescriptor::m_StrideZ
uint32_t m_StrideZ
Stride value when proceeding through input for the depth dimension.
Definition: Descriptors.hpp:497
armnn::CreateClContext
flatbuffers::Offset< ClContext > CreateClContext(flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>> programs=0)
Definition: ClContextSchema_generated.h:57
armnn::FoldPadLayer
LayerType * FoldPadLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, PadLayer *padLayer)
Definition: SubgraphUtils.hpp:234
armnnUtils::Permuted
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:125
armnn::CopyArmComputeClTensorData
void CopyArmComputeClTensorData(arm_compute::CLTensor &dstTensor, const T *srcData)
Definition: ClWorkloadUtils.hpp:64
armnn::Pooling2dDescriptor::m_PoolWidth
uint32_t m_PoolWidth
Pooling width value.
Definition: Descriptors.hpp:415
armnn::Stack
void Stack(const StackQueueDescriptor &data, std::vector< std::unique_ptr< Decoder< float >>> &inputs, Encoder< float > &output, const TensorInfo &inputInfo, const TensorInfo &outputInfo)
Definition: Stack.cpp:12
armnn::UnaryOperation::Neg
@ Neg
armnn::optimizations::PermuteAndBatchToSpaceAsDepthToSpace
OptimizeForConnection< PermuteLayer, BatchToSpaceNdLayer, PermuteAndBatchToSpaceAsDepthToSpaceImpl< PermuteLayer > > PermuteAndBatchToSpaceAsDepthToSpace
Definition: PermuteAndBatchToSpaceAsDepthToSpace.hpp:102
armnn::BatchToSpaceNd
void BatchToSpaceNd(const TensorInfo &inputInfo, const TensorInfo &outputInfo, const BatchToSpaceNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
Definition: BatchToSpaceNd.cpp:50
armnn::Pooling3dDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCDHW, NDHWC).
Definition: Descriptors.hpp:503
armnn::GetActivationFunctionAsCString
constexpr char const * GetActivationFunctionAsCString(ActivationFunction activation)
Definition: TypesUtils.hpp:31
ARMNN_ASSERT_MSG
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
armnn::BatchToSpaceNdDescriptor::m_Crops
std::vector< std::pair< unsigned int, unsigned int > > m_Crops
The values to crop from the input dimension.
Definition: Descriptors.hpp:900
CopyVector
void CopyVector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:244
armnn::optimizations::PermuteDepthwiseConv2dWeights
OptimizeForType< Layer, PermuteDepthwiseConv2dWeightsImpl > PermuteDepthwiseConv2dWeights
Definition: PermuteDepthwiseConv2dWeights.hpp:78
armnn::SelectTensorHandleStrategy
OptimizationResult SelectTensorHandleStrategy(Graph &optGraph, BackendsMap &backends, TensorHandleFactoryRegistry &registry, bool importEnabled, bool exportEnabled, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:1812
armnn::IsNCHW
bool IsNCHW(armnn::Layer &layer)
Definition: SubgraphUtils.hpp:213
armnn::BoostLogSeverityMapping::error
@ error
armnn::Pooling3dDescriptor::m_PadBottom
uint32_t m_PadBottom
Padding bottom value in the height dimension.
Definition: Descriptors.hpp:481
armnn::Coordinates
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
Definition: InternalTypes.hpp:15
Sub1Vector
void Sub1Vector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &result)
Definition: LstmUtils.cpp:173
armnn::TensorInfo::IsConstant
bool IsConstant() const
Definition: Tensor.cpp:509
armnn::ReduceOperation::Mean
@ Mean
armnn::ActivationFunction::HardSwish
@ HardSwish
armnn::DataType::QSymmS16
@ QSymmS16
armnn::ActivationFunction::Gelu
@ Gelu
armnn::numeric_cast
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
armnn::LayerType::ElementwiseBinary
@ ElementwiseBinary
armnn::DataType::BFloat16
@ BFloat16
armnn::optimizations::MovePermuteUp
OptimizeForConnection< Layer, PermuteLayer, MovePermuteUpImpl > MovePermuteUp
Definition: MovePermuteUp.hpp:83
armnn::FullyConnected
void FullyConnected(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *pBiasDecoder, const bool biasEnabled, const unsigned int K, const bool transposeWeights)
Performs a matrix multiplication and optionally adds a bias.
Definition: FullyConnected.cpp:15
armnn::Pooling3dDescriptor::m_StrideY
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
Definition: Descriptors.hpp:495
armnn::optimizations::OptimizeInverseConversionsFp16
OptimizeForConnection< ConvertFp16ToFp32Layer, ConvertFp32ToFp16Layer, OptimizeInverseConversionsImpl > OptimizeInverseConversionsFp16
Definition: OptimizeInverseConversions.hpp:42
armnn::Pooling3d
void Pooling3d(Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling3dDescriptor &params)
Computes the Pooling3d operation.
Definition: Pooling3d.cpp:172
armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:332
armnn::NormalizationAlgorithmChannel::Across
@ Across
armnn::ClContextIdentifier
const char * ClContextIdentifier()
Definition: ClContextSchema_generated.h:148
armnn::GetOffset
unsigned int GetOffset(const TensorShape &shape, unsigned int b, unsigned int h, unsigned int w, unsigned int c, const DataLayoutIndexed &dataLayout)
Definition: SpaceToBatchNd.cpp:15
armnn::Combine
MemorySourceFlags Combine(Arg source, Args... rest)
Definition: MemorySources.hpp:36
armnnUtils::DataLayoutIndexed::GetHeightIndex
unsigned int GetHeightIndex() const
Definition: DataLayoutIndexed.hpp:24
armnn::ComparisonOperation::NotEqual
@ NotEqual
armnn::Compute
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
Definition: BackendId.hpp:21
armnn::ComparisonOperation::GreaterOrEqual
@ GreaterOrEqual
armnn::FusedKernelType::AddMulAdd
@ AddMulAdd
armnnUtils::GetPerAxisParams
std::pair< unsigned int, std::vector< float > > GetPerAxisParams(const armnn::TensorInfo &info)
Definition: TensorUtils.cpp:280
armnn::LogSeverity::Error
@ Error
armnn::Exception::what
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
armnn::QosExecPriority::Low
@ Low
armnn::OptimizationResult::IsOk
bool IsOk() const
Definition: Network.hpp:273
ARMNN_LOG
#define ARMNN_LOG(severity)
Definition: Logging.hpp:212
armnn::DataLayout::NDHWC
@ NDHWC
armnn::EdgeStrategy::CopyToTarget
@ CopyToTarget
Source backends tensor data can be exported to destination backend tensor without copy.
armnn::Fill
void Fill(Encoder< float > &output, const TensorShape &desiredOutputShape, const float value)
Creates a tensor and fills it with a scalar value.
Definition: Fill.cpp:13
armnn::ViewsDescriptor::GetViewSizes
const uint32_t * GetViewSizes(uint32_t idx) const
Get the view sizes at the int value idx.
Definition: Descriptors.cpp:346
armnn::AllTypesAreEqualImpl
bool AllTypesAreEqualImpl(T t1, T t2, Rest... rest)
Definition: LayerSupportRules.hpp:65
armnn::Pooling3dDescriptor::m_PoolType
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
Definition: Descriptors.hpp:473
CreateResizeAttributes
arm_compute::experimental::dynamic_fusion::ResizeAttributes CreateResizeAttributes(const armnn::ResizeDescriptor &descriptor)
Utility function used to setup an arm_compute::ResizeDescriptor object from given descriptor.
Definition: UtilsGpuFsa.cpp:64
armnn::CreateSupportedBackends
BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry &handleFactoryRegistry, BackendSettings &backendSettings)
Definition: Network.cpp:1309
armnn::ClReduceWorkloadValidate
arm_compute::Status ClReduceWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
Definition: ClReduceWorkload.cpp:18
armnn::SpaceToBatchNdDescriptor::m_PadList
std::vector< std::pair< unsigned int, unsigned int > > m_PadList
Specifies the padding values for the input dimension: heightPad{top, bottom} widthPad{left,...
Definition: Descriptors.hpp:1069
armnn::TensorShape
Definition: Tensor.hpp:20
armnn::ReverseV2
void ReverseV2(const TensorInfo &inputInfo, const TensorInfo &axisInfo, Decoder< float > &inputDecoder, Decoder< int > &axisDecoder, Encoder< float > &outputEncoder)
Definition: ReverseV2Impl.cpp:78
armnn::Convolution3dDescriptor::m_PadRight
uint32_t m_PadRight
Padding right value in the width dimension.
Definition: Descriptors.hpp:631
armnn::ConvertReductionOperationToAcl
arm_compute::ReductionOperation ConvertReductionOperationToAcl(const ReduceDescriptor &descriptor)
Definition: ArmComputeUtils.hpp:333
armnn::NeonReduceWorkloadValidate
arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
Definition: NeonReduceWorkload.cpp:19
armnn::IntersectionOverUnion
float IntersectionOverUnion(const float *boxI, const float *boxJ)
Definition: DetectionPostProcess.cpp:31
armnn::BackendOptions::Var::IsInt
bool IsInt() const
Definition: BackendOptions.hpp:113
armnn::BinaryOperation::Maximum
@ Maximum
armnn::BoostLogSeverityMapping::trace
@ trace
CreatePool2dAttributes
arm_compute::experimental::dynamic_fusion::Pool2dAttributes CreatePool2dAttributes(const Pooling2dDescriptor &descriptor)
Utility function used to setup an arm_compute::Pool2dAttributes object from given descriptor.
Definition: UtilsGpuFsa.cpp:45
armnn::DataType::Float16
@ Float16
armnn::optimizations::ConvertConstantsFloatToHalf
ConvertConstants< Float32ToFloat16, IsFloat16Layer > ConvertConstantsFloatToHalf
Definition: ConvertConstants.hpp:99
ARMNN_POLYMORPHIC_CAST_CHECK
#define ARMNN_POLYMORPHIC_CAST_CHECK(cond)
Definition: PolymorphicDowncast.hpp:27
armnn::AttemptBackendAssignment
OptimizationResult AttemptBackendAssignment(BackendSettings &backendSettings, Graph &graph, Layer *layer, BackendId backend, DataType dataTypeIn, DataType dataTypeOut, const std::vector< BackendId > &availablePreferredBackends, std::string &reasonIfUnsupported, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:847
armnn::Pooling3dDescriptor::m_PoolWidth
uint32_t m_PoolWidth
Pooling width value.
Definition: Descriptors.hpp:487
armnn::BinaryOperation::SqDiff
@ SqDiff
armnn::Pooling2dDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:427
armnn::QueueDescriptorWithParameters::m_Parameters
LayerDescriptor m_Parameters
Definition: WorkloadData.hpp:66
armnn::UnaryOperation::Rsqrt
@ Rsqrt
armnn::NeonDivisionWorkloadValidate
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: NeonDivisionWorkload.cpp:18
armnn::TensorShape::GetNumDimensions
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
armnn::DepthwiseConvolution2dDescriptor::m_DilationY
uint32_t m_DilationY
Dilation factor value for height dimension.
Definition: Descriptors.hpp:706
armnn::ComparisonOperation::Less
@ Less
CreateConv2dAttributes
Conv2dAttributes CreateConv2dAttributes(const Convolution2dDescriptor &descriptor)
Utility function used to setup an arm_compute::Conv2dAttributes object from given descriptor.
Definition: UtilsGpuFsa.cpp:14
armnn::UnaryOperation::LogicalNot
@ LogicalNot
armnn::Pooling2dDescriptor::m_PadBottom
uint32_t m_PadBottom
Padding bottom value in the height dimension.
Definition: Descriptors.hpp:413
armnn::ForEachLayerInput
void ForEachLayerInput(LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)
Definition: SubgraphViewSelector.cpp:267
armnn::Slice
void Slice(const TensorInfo &inputInfo, const SliceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
Definition: Slice.cpp:14
armnn::ViewsDescriptor::GetAxis
int32_t GetAxis() const
Get the axis value.
Definition: Descriptors.cpp:381
armnn::Pooling3dDescriptor::m_PaddingMethod
PaddingMethod m_PaddingMethod
The padding method to be used. (Exclude, IgnoreValue).
Definition: Descriptors.hpp:501
armnn::Pooling2dDescriptor::m_PadRight
uint32_t m_PadRight
Padding right value in the width dimension.
Definition: Descriptors.hpp:409
CreateDWConv2dAttributes
arm_compute::experimental::dynamic_fusion::DepthwiseConv2dAttributes CreateDWConv2dAttributes(const DepthwiseConvolution2dDescriptor &descriptor, const unsigned int aclDepthMultiplier)
Utility function used to setup an arm_compute::DepthwiseConv2dAttributes object from given descriptor...
Definition: UtilsGpuFsa.cpp:29
armnn::FactoryId
ITensorHandleFactory::FactoryId FactoryId
Definition: MockTensorHandleFactory.cpp:12
ARMNN_THROW_INVALIDARG_IF_FALSE
#define ARMNN_THROW_INVALIDARG_IF_FALSE(_cond)
Definition: Exceptions.hpp:212
armnn::InsertConvertFp32ToFp16LayersAfter
std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter(Graph &graph, Layer &layer)
Definition: NetworkUtils.cpp:79
armnn::ReverseRelocateIdx
unsigned int ReverseRelocateIdx(unsigned int idx, unsigned int inputRank, std::vector< bool > &axisFlag, std::vector< unsigned int > &dimSize, std::vector< unsigned int > &elementNumInner)
Definition: ReverseV2Impl.cpp:49
armnn::UnaryOperation::Exp
@ Exp
ARMNN_SCOPED_PROFILING_EVENT
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
armnn::BackendOptions::Var::IsString
bool IsString() const
Definition: BackendOptions.hpp:116
armnn::ArgMinMax
void ArgMinMax(Decoder< float > &in, OUT *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
Definition: ArgMinMax.cpp:17
armnn::Convolution3dDescriptor::m_DilationZ
uint32_t m_DilationZ
Dilation along z axis.
Definition: Descriptors.hpp:651
armnn::MemorySource::DmaBuf
@ DmaBuf
armnn::PaddingMethod::Exclude
@ Exclude
The padding fields don't count and are ignored.
armnn::EmptyOptional
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
armnn::CalculateGatherNdKeyIndices
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1)
Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)
Definition: WorkloadUtils.cpp:312
armnn::ReverseGetMultIdx
std::vector< unsigned int > ReverseGetMultIdx(const unsigned int idx, unsigned int inputRank, std::vector< unsigned int > &elementNumInner)
Definition: ReverseV2Impl.cpp:16
armnnUtils::FloatingPointConverter::ConvertFloat16To32
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Definition: FloatingPointConverter.cpp:43
armnn::DataType
DataType
Definition: Types.hpp:48
armnn::ReportError
void ReportError(const std::string &errorMessage, Optional< std::vector< std::string > & > errorMessages)
Definition: Network.cpp:756
armnn::LayerType::Softmax
@ Softmax
armnn::CheckFp16Support
bool CheckFp16Support(BackendsMap &backends, const std::vector< BackendId > &availablePreferredBackends)
Definition: Network.cpp:1029
armnn::PolymorphicDowncast
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
Definition: PolymorphicDowncast.hpp:74
armnn::Dimensionality::Scalar
@ Scalar
armnn::ActivationFunction::Elu
@ Elu
armnn::IsSequenceLayerType
bool IsSequenceLayerType(Layer &layer, BinaryOperation type)
Definition: SubgraphUtils.hpp:367
armnn::BackendRegistryInstance
BackendRegistry & BackendRegistryInstance()
Definition: BackendRegistry.cpp:15
armnn::PaddingMethod::IgnoreValue
@ IgnoreValue
The padding fields count, but are ignored.
armnn::IndexToCoordinates
std::vector< uint32_t > IndexToCoordinates(std::vector< uint32_t > &shape, uint32_t index)
Definition: Tile.cpp:16
armnn::InvalidArgumentException
Definition: Exceptions.hpp:80
armnn::UnaryOperation::Sin
@ Sin
armnn::optimizations::FusePermuteIntoConstLayer
OptimizeForConnection< ConstantLayer, PermuteLayer, ConvertConstPermuteLayersToConstLayers > FusePermuteIntoConstLayer
Definition: ConvertConstPermuteLayersToConstLayers.hpp:124
armnn::LayerBindingId
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:309
armnn::ConvertLstmActivationFuncToAclLayerInfo
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo(uint32_t activationFunction)
Definition: ArmComputeUtils.hpp:118
armnn::Dequantize
float Dequantize(QuantizedType value, float scale, int32_t offset)
Dequantize an 8-bit data type into a floating point data type.
Definition: TypesUtils.cpp:52
armnn::ActivationFunction::Linear
@ Linear
armnn::Convolution3dDescriptor::m_PadLeft
uint32_t m_PadLeft
Padding left value in the width dimension.
Definition: Descriptors.hpp:629
armnn::EdgeStrategy
EdgeStrategy
Definition: ITensorHandleFactory.hpp:104
armnn::SetAllLoggingSinks
void SetAllLoggingSinks(bool standardOut, bool debugOut, bool coloured)
Definition: Logging.cpp:191
armnn::MakeOptimizations
Optimizer::Optimizations MakeOptimizations(Args &&... args)
Definition: Optimizer.hpp:43
armnn::BackendOptions::Var::AsBool
bool AsBool() const
Value getters.
Definition: BackendOptions.hpp:119
armnn::Decoder::DecodeTensor
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
armnn::Convolution3dDescriptor::m_StrideY
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
Definition: Descriptors.hpp:643
armnn::ReducedOutputOffset
unsigned int ReducedOutputOffset(const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &index, const unsigned int numAxis, const std::vector< unsigned int > &axis)
Definition: Reduce.cpp:40
armnn::LogSeverity::Fatal
@ Fatal
armnn::ReduceOperation::Sum
@ Sum
armnn::RequiresCopy
bool RequiresCopy(ITensorHandleFactory::FactoryId src, ITensorHandleFactory::FactoryId dst, TensorHandleFactoryRegistry &registry)
Definition: Network.cpp:1458
armnn::LayerType::Addition
@ Addition
ARMNN_NUMERIC_CAST_CHECK
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25
armnn::GetDataTypeSize
constexpr unsigned int GetDataTypeSize(DataType dataType)
Definition: TypesUtils.hpp:182
armnn::Softmax
void Softmax(Decoder< float > &in, Encoder< float > &out, const TensorInfo &inputTensorInfo, float beta, int axis)
Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.
Definition: Softmax.cpp:17
armnn::MemBlockStrategyType::SingleAxisPacking
@ SingleAxisPacking
armnn::ReturnWithError
OptimizationResult ReturnWithError(OptimizationResult res, const Layer *layer, const BackendSettings &backendSettings, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:780
armnn::SpaceToDepth
void SpaceToDepth(const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToDepthDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
Definition: SpaceToDepth.cpp:36
armnn::ReverseGetFlatIdx
unsigned int ReverseGetFlatIdx(const std::vector< unsigned int > &idxList, unsigned int inputRank, std::vector< unsigned int > &elementNumInner)
Definition: ReverseV2Impl.cpp:34
armnn::Convolution3dDescriptor::m_StrideX
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Definition: Descriptors.hpp:641
armnn::PermutationVector
Definition: Types.hpp:314
armnn::Status::Success
@ Success
armnn::SetNeonSliceData
auto SetNeonSliceData(const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)
Definition: NeonWorkloadUtils.hpp:161
armnn::Dimensionality::NotSpecified
@ NotSpecified
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnn::GraphEvent::LayerErased
@ LayerErased
armnnUtils::DataLayoutIndexed::GetWidthIndex
unsigned int GetWidthIndex() const
Definition: DataLayoutIndexed.hpp:25
armnn::SpaceToBatchNd
void SpaceToBatchNd(const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToBatchNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
Definition: SpaceToBatchNd.cpp:48
armnn::ConvertActivationDescriptorToAclActivationLayerInfo
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)
Definition: ArmComputeUtils.hpp:87
armnn::LayerType::BroadcastTo
@ BroadcastTo
armnn::RuntimeException
Definition: Exceptions.hpp:120
armnn::DetectionPostProcess
void DetectionPostProcess(const TensorInfo &boxEncodingsInfo, const TensorInfo &scoresInfo, const TensorInfo &anchorsInfo, const TensorInfo &detectionBoxesInfo, const TensorInfo &detectionClassesInfo, const TensorInfo &detectionScoresInfo, const TensorInfo &numDetectionsInfo, const DetectionPostProcessDescriptor &desc, Decoder< float > &boxEncodings, Decoder< float > &scores, Decoder< float > &anchors, float *detectionBoxes, float *detectionClasses, float *detectionScores, float *numDetections)
Definition: DetectionPostProcess.cpp:141
armnn::Quantize
QuantizedType Quantize(float value, float scale, int32_t offset)
Quantize a floating point data type into an 8-bit data type.
Definition: TypesUtils.cpp:30
armnn::IsLayerSupported
bool IsLayerSupported(const armnn::Layer &layer)
Definition: MockBackend.cpp:83
armnn::ResizeMethod::NearestNeighbor
@ NearestNeighbor
armnn::GetLayerInOutDatatype
std::vector< DataType > GetLayerInOutDatatype(const Layer *layer)
Definition: Network.cpp:1020
armnn::Pooling2dDescriptor::m_PadLeft
uint32_t m_PadLeft
Padding left value in the width dimension.
Definition: Descriptors.hpp:407
armnn::ActivationFunction
ActivationFunction
Definition: Types.hpp:86
armnn::BoostLogSeverityMapping::info
@ info
armnn::BinaryOperation::Power
@ Power
armnn::Pooling3dDescriptor::m_PadFront
uint32_t m_PadFront
Padding front value in the depth dimension.
Definition: Descriptors.hpp:483
armnn::BackendCapability::AsyncExecution
@ AsyncExecution
Asynchronous Execution.
armnn::CopyTensorContentsGeneric
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Definition: WorkloadUtils.hpp:46
armnn::Tile
void Tile(const TileDescriptor &params, const TensorInfo &inputInfo, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
Definition: Tile.cpp:45
armnn::ConvertComparisonOperationToAcl
arm_compute::ComparisonOperation ConvertComparisonOperationToAcl(const ComparisonDescriptor &descriptor)
Definition: ArmComputeUtils.hpp:141
armnn::PoolingAlgorithm::Average
@ Average
armnn::Decoder::Get
virtual IType Get() const =0
armnn::SetClSliceData
auto SetClSliceData(const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)
Definition: ClWorkloadUtils.hpp:100
armnn::DataType::Signed32
@ Signed32
armnn::UnaryOperation::Ceil
@ Ceil
ZeroVector
void ZeroVector(armnn::Encoder< float > &vector, uint32_t vSize)
Definition: LstmUtils.cpp:76
armnn::ShapeInferenceMethod::ValidateOnly
@ ValidateOnly
Validate all output shapes.
armnn::ShapeInferenceMethod::InferAndValidate
@ InferAndValidate
Infer missing output shapes and validate all output shapes.
armnn::ReduceOperation::Prod
@ Prod
armnn::ActivationFunction::Abs
@ Abs
armnn::BackendOptions::Var::AsString
std::string AsString() const
Definition: BackendOptions.hpp:123
armnn::DataType::QAsymmS8
@ QAsymmS8
armnn::CapabilityClass::PaddingRequired
@ PaddingRequired
armnn::Pooling3dDescriptor::m_PadRight
uint32_t m_PadRight
Padding right value in the width dimension.
Definition: Descriptors.hpp:477
armnn::ResizeMethod::Bilinear
@ Bilinear
ARMNN_FALLTHROUGH
#define ARMNN_FALLTHROUGH
Definition: Utils.hpp:36
armnn::ArgMinMaxFunction::Min
@ Min
armnn::Pad
void Pad(const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle *inputHandle, ITensorHandle *outputHandle, const PadQueueDescriptor &data)
Definition: Pad.cpp:39
armnn::LayerType::LastLayer
@ LastLayer
armnn::roundf
float roundf(float value)
Definition: Utils.hpp:43
armnn::Pooling2dDescriptor::m_StrideX
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Definition: Descriptors.hpp:419
armnn::JsonObjectType::ExecObjectDesc
@ ExecObjectDesc
armnn::ReshapeWeightsForAcl
void ReshapeWeightsForAcl(TensorInfo &weightInfo, DataLayout dataLayout)
Definition: WorkloadUtils.cpp:47
armnn::SpaceToDepthDescriptor::m_BlockSize
unsigned int m_BlockSize
Scalar specifying the input block size. It must be >= 1.
Definition: Descriptors.hpp:1092
armnn::CoordinatesToIndex
uint32_t CoordinatesToIndex(TensorShape &shape, std::vector< uint32_t > &coordinates)
Definition: Tile.cpp:32
armnn::TuningLevel::Rapid
@ Rapid
armnn::JsonObjectType::Event
@ Event
ARMNN_VERSION
#define ARMNN_VERSION
ARMNN_VERSION: "X.Y.Z" where: X = Major version number Y = Minor version number Z = Patch version num...
Definition: Version.hpp:22
armnn::UnaryOperation::Log
@ Log
armnn::Debug
void Debug(const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex, bool outputsToFile)
Definition: Debug.cpp:97
armnn::Pooling3dDescriptor::m_PoolHeight
uint32_t m_PoolHeight
Pooling height value.
Definition: Descriptors.hpp:489
armnn::LogicalBinaryOperation::LogicalAnd
@ LogicalAnd
armnn::optimizations::OptimizeInverseConversionsFp32
OptimizeForConnection< ConvertFp32ToFp16Layer, ConvertFp16ToFp32Layer, OptimizeInverseConversionsImpl > OptimizeInverseConversionsFp32
Definition: OptimizeInverseConversions.hpp:44
armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22
armnn::TuningLevel::Normal
@ Normal
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::Resize
void Resize(Decoder< float > &in, const TensorInfo &inputInfo, Encoder< float > &out, const TensorInfo &outputInfo, DataLayoutIndexed dataLayout, ResizeMethod resizeMethod, bool alignCorners, bool halfPixelCenters)
Definition: Resize.cpp:65
FORWARD_WORKLOAD_VALIDATE_FUNC
#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported,...)
Definition: NeonLayerSupport.cpp:154
armnn::ComparisonOperation
ComparisonOperation
Definition: Types.hpp:109
armnn::ExtractJsonObjects
void ExtractJsonObjects(unsigned int inferenceIndex, const Event *parentEvent, JsonChildObject &parentObject, std::map< const Event *, std::vector< const Event * >> descendantsMap)
Definition: Profiling.cpp:303
armnn::CalculateSlotOptionForOutput
ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry)
Definition: Network.cpp:1563
armnn::Dimensionality::Specified
@ Specified
armnn::Status
Status
Definition: Types.hpp:42
armnn::Pooling3dDescriptor::m_PadBack
uint32_t m_PadBack
Padding back value in the depth dimension.
Definition: Descriptors.hpp:485
armnn::optimizations::TransposeAsReshape
OptimizeForType< TransposeLayer, TransposeAsReshapeImpl > TransposeAsReshape
Definition: TransposeAsReshape.hpp:77
armnn::ProfilingDetailsMethod::DetailsOnly
@ DetailsOnly
armnn::BackendOptions::Var::IsBool
bool IsBool() const
Type getters.
Definition: BackendOptions.hpp:112
armnn::Pooling3dDescriptor::m_StrideX
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Definition: Descriptors.hpp:493
armnnUtils::GetNumElementsBetween
unsigned int GetNumElementsBetween(const armnn::TensorShape &shape, unsigned int firstAxisInclusive, unsigned int lastAxisExclusive)
Definition: TensorUtils.cpp:209
armnn::BoostLogSeverityMapping::fatal
@ fatal
armnn::Pooling3dDescriptor::m_PadLeft
uint32_t m_PadLeft
Padding left value in the width dimension.
Definition: Descriptors.hpp:475
armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
armnn::CalculateSlotOptionForInput
ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry, bool importEnabled)
Definition: Network.cpp:1478
armnn::Convolution3dDescriptor::m_PadBack
uint32_t m_PadBack
Padding back value in the depth dimension.
Definition: Descriptors.hpp:639
ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
armnn::Offset
unsigned int Offset(const TensorShape &shape, unsigned int batch, unsigned int height, unsigned int width, unsigned int channels, const DataLayoutIndexed &dataLayout)
Definition: BatchToSpaceNd.cpp:15
armnn::Layer::ExecuteStrategy
void ExecuteStrategy(IStrategy &strategy) const override
Apply a visitor to this layer.
Definition: Layer.cpp:549
armnn::BoostLogSeverityMapping::debug
@ debug
armnn::optimizations::FuseBatchNormIntoConvolution2DFloat16
OptimizeForExclusiveConnection< Convolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< Convolution2dLayer, armnn::DataType::Float16 > > FuseBatchNormIntoConvolution2DFloat16
Definition: FuseBatchNorm.hpp:227
armnn::StrEqual
constexpr bool StrEqual(const char *strA, const char(&strB)[N])
Definition: TypesUtils.hpp:201
armnn::BFloat16
Definition: BFloat16.hpp:15
armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14
armnn::ConvertPaddingModeToAcl
arm_compute::PaddingMode ConvertPaddingModeToAcl(const PaddingMode &paddingMode)
Definition: ArmComputeUtils.hpp:322
armnn::ViewsDescriptor::GetNumDimensions
uint32_t GetNumDimensions() const
Get the number of dimensions.
Definition: Descriptors.cpp:306
armnn::EdgeStrategy::ExportToTarget
@ ExportToTarget
Destination backend can work directly with tensors on source backend.
armnn::ConvertMaskToACLFormat
int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim)
Definition: WorkloadUtils.cpp:298
armnn::AssignBackends
OptimizationResult AssignBackends(OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView &subgraph, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:1295
LIST_OF_LAYER_TYPE
#define LIST_OF_LAYER_TYPE
This list uses X macro technique.
Definition: Types.hpp:408
armnn::LogSeverity::Warning
@ Warning
armnn::BinaryOperation::Minimum
@ Minimum
armnn::LayerType::Map
@ Map
armnn::optimizations::FuseBatchNormIntoDepthwiseConvolution2DFloat16
OptimizeForExclusiveConnection< DepthwiseConvolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< DepthwiseConvolution2dLayer, armnn::DataType::Float16 > > FuseBatchNormIntoDepthwiseConvolution2DFloat16
Definition: FuseBatchNorm.hpp:237
armnn::IsQuantizedType
constexpr bool IsQuantizedType()
Definition: TypesUtils.hpp:311
armnn::Convolution3dDescriptor::m_DilationY
uint32_t m_DilationY
Dilation along y axis.
Definition: Descriptors.hpp:649
armnn::BackendsMap
std::map< BackendId, std::unique_ptr< class IBackendInternal > > BackendsMap
Definition: Network.hpp:282
armnn::Compute::CpuAcc
@ CpuAcc
CPU Execution: NEON: ArmCompute.
VectorVectorCwiseProductAccumulate
void VectorVectorCwiseProductAccumulate(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:204
armnn::ProfilingDetailsMethod::DetailsWithEvents
@ DetailsWithEvents
armnn::ActivationFunction::ReLu
@ ReLu
armnn::LayerType::MemCopy
@ MemCopy
armnn::optimizations::SquashEqualTransposeSiblings
OptimizeForConnection< Layer, TransposeLayer, SquashEqualSiblingsImpl< TransposeLayer > > SquashEqualTransposeSiblings
Definition: SquashEqualSiblings.hpp:69
armnn::TensorInfo::SetShape
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:195
armnn::IsLayerSupported
bool IsLayerSupported(const armnn::Layer *layer)
Definition: MockBackend.cpp:62
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnnUtils::DataLayoutIndexed::GetChannelsIndex
unsigned int GetChannelsIndex() const
Definition: DataLayoutIndexed.hpp:23
armnn::optimizations::ConvertConstantsHalfToFloat
ConvertConstants< Float16ToFloat32, IsFloat32Layer > ConvertConstantsHalfToFloat
Definition: ConvertConstants.hpp:98
armnn::Convolution3dDescriptor::m_StrideZ
uint32_t m_StrideZ
Stride value when proceeding through input for the depth dimension.
Definition: Descriptors.hpp:645
armnn::DepthToSpace
void DepthToSpace(const TensorInfo &inputInfo, const DepthToSpaceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
Definition: DepthToSpace.cpp:18
armnn::TopKSort
void TopKSort(unsigned int k, unsigned int *indices, const float *values, unsigned int numElement)
Definition: DetectionPostProcess.cpp:25
armnn::ViewsDescriptor::GetNumViews
uint32_t GetNumViews() const
Get the number of views.
Definition: Descriptors.cpp:301
armnn::StridedSlice
void StridedSlice(const TensorInfo &inputInfo, const StridedSliceDescriptor &params, const void *inputData, void *outputData, unsigned int dataTypeSize)
Definition: StridedSlice.cpp:90
armnn::optimizations::SquashEqualPermuteSiblings
OptimizeForConnection< Layer, PermuteLayer, SquashEqualSiblingsImpl< PermuteLayer > > SquashEqualPermuteSiblings
Definition: SquashEqualSiblings.hpp:67
armnn::BoostLogSeverityMapping::warning
@ warning
armnn::ComputeReductionTensorShape
const TensorInfo ComputeReductionTensorShape(const armnn::TensorInfo &input, const std::vector< uint32_t > &vAxis, const bool keepDims)
Function to compute the output tensor shape based on the axes and if keepDims is set.
Definition: ArmComputeUtils.hpp:347
armnn::ActivationFunction::Square
@ Square
SetActivationParameters
void SetActivationParameters(uint32_t activation, armnn::ActivationFunction &outArmnnActivation, float &outA, float &outB)
Definition: LstmUtils.cpp:258
armnn::LayerType::Input
@ Input
armnn::NonMaxSuppression
std::vector< unsigned int > NonMaxSuppression(unsigned int numBoxes, const std::vector< float > &boxCorners, const std::vector< float > &scores, float nmsScoreThreshold, unsigned int maxDetection, float nmsIouThreshold)
Definition: DetectionPostProcess.cpp:50
armnn::ModelOptions
std::vector< BackendOptions > ModelOptions
Definition: BackendOptions.hpp:18
armnn::optimizations::FoldPadIntoDepthwiseConvolution2d
OptimizeForExclusiveConnection< PadLayer, DepthwiseConvolution2dLayer, pad_fold::FoldPadIntoDepthwiseConvolution2dImpl > FoldPadIntoDepthwiseConvolution2d
Definition: FoldPadIntoLayer2d.hpp:281
armnn::TuningLevel
TuningLevel
Definition: ArmComputeTuningUtils.hpp:18
armnn::Activation
float Activation(float in, ActivationFunction function, float a, float b)
Definition: Activation.cpp:13
armnn::FuseLayer
LayerType * FuseLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
Definition: ArmComputeSubgraphUtils.hpp:54
armnn::NeonElementwiseBinaryWorkloadValidate
arm_compute::Status NeonElementwiseBinaryWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ElementwiseBinaryDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Definition: NeonElementwiseBinaryWorkload.cpp:20
armnn::CapabilityClass::CapabilityClassMax
@ CapabilityClassMax
armnnUtils::Filesystem::CreateDirectory
std::string CreateDirectory(std::string sPath)
Returns full path to temporary folder.
Definition: Filesystem.cpp:47
armnn::SetValueChecked
void SetValueChecked(Optional< T & > optionalRef, V &&val)
Definition: LayerSupportCommon.hpp:17
armnnUtils::GetTensorInfo
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Definition: TensorUtils.cpp:40
armnn::optimizations::SquashEqualReshapeSiblings
OptimizeForConnection< Layer, ReshapeLayer, SquashEqualSiblingsImpl< ReshapeLayer > > SquashEqualReshapeSiblings
Definition: SquashEqualSiblings.hpp:70
armnn::LogSeverity
LogSeverity
Definition: Utils.hpp:13
armnn::BinaryOperation::Div
@ Div
armnn::OutputShapeRounding::Ceiling
@ Ceiling
armnn::DataType::Signed64
@ Signed64
armnn::LayerType::Convolution2d
@ Convolution2d
armnn::CalculateEdgeStrategy
EdgeStrategy CalculateEdgeStrategy(BackendsMap &backends, ITensorHandleFactory::FactoryId srcFactoryId, const Layer &layer, const Layer &connectedLayer, TensorHandleFactoryRegistry &registry, bool importEnabled)
Definition: Network.cpp:1723
armnn::LayerType::FirstLayer
@ FirstLayer
armnn::GetComputeDeviceAsCString
constexpr char const * GetComputeDeviceAsCString(Compute compute)
Deprecated function that will be removed together with the Compute enum.
Definition: BackendId.hpp:34
armnn::Optimize
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptionsOpaque &options=OptimizerOptionsOpaque(), Optional< std::vector< std::string > & > messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:2132
armnn::BatchToSpaceNdDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:902
armnn::ComparisonOperation::Equal
@ Equal
armnn::optimizations::OptimizeInverseTransposes
OptimizeForConnection< TransposeLayer, TransposeLayer, OptimizeInversePermutesImpl< TransposeLayer > > OptimizeInverseTransposes
Definition: OptimizeInversePermutes.hpp:45
armnnUtils::DataLayoutIndexed::GetIndex
unsigned int GetIndex(const armnn::TensorShape &shape, unsigned int batchIndex, unsigned int channelIndex, unsigned int heightIndex, unsigned int widthIndex) const
Definition: DataLayoutIndexed.hpp:28
armnn::DepthwiseConvolution2dDescriptor::m_DilationX
uint32_t m_DilationX
Dilation factor value for width dimension.
Definition: Descriptors.hpp:704
armnn::NormalizationAlgorithmMethod::LocalContrast
@ LocalContrast
Jarret 2009: Local Contrast Normalization.
armnn::optimizations::AddBroadcastReshapeLayer
OptimizeForType< Layer, AddBroadcastReshapeLayerImpl > AddBroadcastReshapeLayer
Definition: AddBroadcastReshapeLayer.hpp:94
armnn::NullPointerException
Definition: Exceptions.hpp:146
armnn::Pooling3dDescriptor::m_PoolDepth
uint32_t m_PoolDepth
Pooling depth value.
Definition: Descriptors.hpp:491
armnn::swap
void swap(ViewsDescriptor &first, ViewsDescriptor &second)
Definition: Descriptors.cpp:365
armnnUtils::GetTensorShape
armnn::TensorShape GetTensorShape(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout)
Definition: TensorUtils.cpp:21
armnn::TensorShape::GetDimensionality
Dimensionality GetDimensionality() const
Function that returns the tensor type.
Definition: Tensor.hpp:92
armnn::optimizations::FuseBatchNormIntoDepthwiseConvolution2DFloat32
OptimizeForExclusiveConnection< DepthwiseConvolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< DepthwiseConvolution2dLayer, armnn::DataType::Float32 > > FuseBatchNormIntoDepthwiseConvolution2DFloat32
Definition: FuseBatchNorm.hpp:232
armnn::PaddingMode::Reflect
@ Reflect
armnn::CopyArmComputeTensorData
void CopyArmComputeTensorData(arm_compute::Tensor &dstTensor, const T *srcData)
Definition: NeonWorkloadUtils.hpp:62
armnn::LayerType
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition: Types.hpp:491
armnn::optimizations::OptimizeConsecutiveReshapes
OptimizeForConnection< ReshapeLayer, ReshapeLayer, OptimizeConsecutiveReshapesImpl > OptimizeConsecutiveReshapes
Definition: OptimizeConsecutiveReshapes.hpp:61
armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo
arm_compute::FullyConnectedLayerInfo ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor &fullyConnectedDesc, const ActivationDescriptor *activationDesc)
Definition: ArmComputeUtils.hpp:194
armnn::TensorShape::GetNumElements
unsigned int GetNumElements() const
Function that calculates the tensor elements by multiplying all dimension size which are Specified.
Definition: Tensor.cpp:181
armnn::CheckScaleSetOnQuantizedType
bool CheckScaleSetOnQuantizedType(Layer *layer, Optional< std::vector< std::string > & > errMessages)
Definition: Network.cpp:795
armnn::OptionalReferenceSwitch< std::is_reference< T >::value, T >::value
const T & value() const
Definition: Optional.hpp:146
armnn::ConvertActivationFunctionToAclActivationFunction
arm_compute::ActivationLayerInfo::ActivationFunction ConvertActivationFunctionToAclActivationFunction(ActivationFunction armnnFunction)
Definition: ArmComputeUtils.hpp:62
armnn::PaddingMode::Constant
@ Constant
armnn::ComputeAclAxis
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank,...
Definition: ArmComputeUtils.hpp:273
armnn::Pooling2dDescriptor::m_PoolType
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
Definition: Descriptors.hpp:405
armnn::Status::Failure
@ Failure
armnn::WrapClError
RuntimeException WrapClError(const cl::Error &clError, const CheckLocation &location)
Definition: ClWorkloadUtils.hpp:160
armnn::Gather
void Gather(const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo, Decoder< float > &params, const int32_t *indices, Encoder< float > &output, const int32_t axis_int)
Definition: Gather.cpp:15
armnn::OptionalBase::has_value
bool has_value() const noexcept
Definition: Optional.hpp:53
armnn::LayerType::Output
@ Output
armnn::LayerType::Constant
@ Constant
armnn::GetCapability
Optional< const BackendOptions::BackendOption > GetCapability(const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
Definition: BackendHelper.cpp:37
armnn::DataLayout::NCHW
@ NCHW
armnn::IOptimizedNetwork::pOptimizedNetworkImpl
std::unique_ptr< OptimizedNetworkImpl > pOptimizedNetworkImpl
Definition: INetwork.hpp:946
armnn::AssignBackendsIConnectable
void AssignBackendsIConnectable(OptimizedNetworkImpl *optNetObjPtr, IConnectableLayer *it, Optional< std::vector< std::string > & > errMessages, OptimizationResult &result, BackendSettings &backendSettings, std::vector< BackendId > &availablePreferredBackends)
Definition: Network.cpp:1076
armnn::ActivationFunction::Sigmoid
@ Sigmoid
armnn::HasMatchingCapability
bool HasMatchingCapability(const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
Convenience function to check if a given capability matches a capability in a BackendCapabilities str...
Definition: BackendHelper.cpp:85
VectorBatchVectorAssign
void VectorBatchVectorAssign(armnn::Decoder< float > &vector, uint32_t vSize, uint32_t nBatch, armnn::Encoder< float > &outBatchVector)
Definition: LstmUtils.cpp:113
armnn::SpaceToDepthDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:1095
armnn::ComparisonOperation::Greater
@ Greater
armnn::NextIndex
bool NextIndex(const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &current)
Definition: Reduce.cpp:19