36 std::string& outReasonIfUnsupported)
43 std::string& outReasonIfUnsupported,
54 void NeonWorkloadFactory::SetNumberOfThreads()
56 if (m_ModelContextPtr)
58 const unsigned int MIN_THREADS = 1;
59 const unsigned int MAX_THREADS = 64;
66 if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS)
68 arm_compute::Scheduler::get().set_num_threads(numberOfThreads);
74 : m_MemoryManager(memoryManager), m_ModelContextPtr(
IBackendInternal::IBackendSpecificModelContextPtr{})
81 : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
88 unsigned int const* subTensorOrigin)
const 90 const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
98 coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
101 const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.
GetShape());
102 if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
107 return std::make_unique<NeonSubTensorHandle>(
108 PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
112 const bool IsMemoryManaged)
const 114 auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
117 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
124 const bool IsMemoryManaged)
const 126 auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
129 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
148 return std::make_unique<NeonActivationWorkload>(descriptor,
info);
154 return std::make_unique<NeonAdditionWorkload>(descriptor,
info);
160 return std::make_unique<NeonArgMinMaxWorkload>(descriptor,
info);
166 return std::make_unique<NeonBatchNormalizationWorkload>(descriptor,
info);
172 return std::make_unique<NeonBatchToSpaceNdWorkload>(descriptor,
info);
178 return std::make_unique<NeonCastWorkload>(descriptor,
info);
184 return std::make_unique<NeonComparisonWorkload>(descriptor,
info);
190 return std::make_unique<NeonConcatWorkload>(descriptor,
info);
196 return std::make_unique<NeonConstantWorkload>(descriptor,
info);
203 return std::make_unique<NeonConvertBf16ToFp32Workload>(descriptor,
info);
210 return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor,
info);
217 return std::make_unique<NeonConvertFp32ToBf16Workload>(descriptor,
info);
224 return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor,
info);
230 bool isFastMathEnabled =
false;
231 if (m_ModelContextPtr)
233 if (m_ModelContextPtr.get() !=
nullptr)
242 return std::make_unique<NeonConvolution2dWorkload>(descriptor,
244 m_MemoryManager->GetIntraLayerManager(),
251 return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor,
info);
257 return std::make_unique<NeonDepthToSpaceWorkload>(descriptor,
info);
263 return std::make_unique<NeonDepthwiseConvolutionWorkload>(descriptor,
info);
269 return std::make_unique<NeonDequantizeWorkload>(descriptor,
info);
275 return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor,
info);
281 return std::make_unique<NeonDivisionWorkload>(descriptor,
info);
295 return std::make_unique<NeonAbsWorkload>(absQueueDescriptor,
info);
303 return std::make_unique<NeonRsqrtWorkload>(rsqrtQueueDescriptor,
info);
306 return std::make_unique<NeonNegWorkload>(descriptor,
info);
308 return std::make_unique<NeonExpWorkload>(descriptor,
info);
310 return std::make_unique<NeonLogicalNotWorkload>(descriptor,
info);
330 return std::make_unique<NeonFillWorkload>(descriptor,
info);
336 return MakeWorkloadHelper<NeonFloorFloatWorkload, NullWorkload>(descriptor,
info);
342 return std::make_unique<NeonFullyConnectedWorkload>(descriptor,
info, m_MemoryManager->GetIntraLayerManager());
348 return std::make_unique<NeonGatherWorkload>(descriptor,
info);
365 return std::make_unique<CopyMemGenericWorkload>(descriptor,
info);
372 return std::make_unique<NeonInstanceNormalizationWorkload>(descriptor,
info);
378 return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor,
info,
379 m_MemoryManager->GetIntraLayerManager());
385 return std::make_unique<NeonLogSoftmaxWorkload>(descriptor,
info, m_MemoryManager->GetIntraLayerManager());
394 return std::make_unique<NeonLogicalAndWorkload>(descriptor,
info);
396 return std::make_unique<NeonLogicalOrWorkload>(descriptor,
info);
405 return MakeWorkloadHelper<NeonLstmFloatWorkload, NullWorkload>(descriptor,
info);
411 return std::make_unique<NeonMaximumWorkload>(descriptor,
info);
417 return std::make_unique<NeonMeanWorkload>(descriptor,
info);
428 return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor,
info);
439 return std::make_unique<ImportMemGenericWorkload>(descriptor,
info);
451 return std::make_unique<NeonMinimumWorkload>(descriptor,
info);
457 return std::make_unique<NeonMultiplicationWorkload>(descriptor,
info);
463 return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>(descriptor,
info,
464 m_MemoryManager->GetIntraLayerManager());
470 return std::make_unique<CopyMemGenericWorkload>(descriptor,
info);
476 return std::make_unique<NeonPadWorkload>(descriptor,
info);
482 return std::make_unique<NeonPermuteWorkload>(descriptor,
info);
488 return std::make_unique<NeonPooling2dWorkload>(descriptor,
info);
494 return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor,
info);
500 return std::make_unique<NeonPreluWorkload>(descriptor,
info);
506 return std::make_unique<NeonQLstmWorkload>(descriptor,
info);
512 return std::make_unique<NeonQuantizeWorkload>(descriptor,
info);
518 return std::make_unique<NeonQuantizedLstmWorkload>(descriptor,
info);
524 return std::make_unique<NeonRankWorkload>(descriptor,
info);
530 return std::make_unique<NeonReduceWorkload>(descriptor,
info);
536 return std::make_unique<NeonReshapeWorkload>(descriptor,
info);
542 return std::make_unique<NeonResizeWorkload>(descriptor,
info);
574 return std::make_unique<NeonSliceWorkload>(descriptor,
info);
580 return std::make_unique<NeonSoftmaxWorkload>(descriptor,
info, m_MemoryManager->GetIntraLayerManager());
586 return std::make_unique<NeonSpaceToBatchNdWorkload>(descriptor,
info);
592 return std::make_unique<NeonSpaceToDepthWorkload>(descriptor,
info);
598 return std::make_unique<NeonSplitterWorkload>(descriptor,
info);
604 return std::make_unique<NeonStackWorkload>(descriptor,
info);
610 return std::make_unique<NeonStridedSliceWorkload>(descriptor,
info);
616 return std::make_unique<NeonSubtractionWorkload>(descriptor,
info);
622 return std::make_unique<NeonTransposeWorkload>(descriptor,
info);
629 return std::make_unique<NeonTransposeConvolution2dWorkload>(descriptor,
info,
630 m_MemoryManager->GetIntraLayerManager());
std::unique_ptr< IWorkload > CreateDepthToSpace(const DepthToSpaceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDebug(const DebugQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLstm(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateCast(const CastQueueDescriptor &descriptor, const WorkloadInfo &info) const override
UnaryOperation m_Operation
Specifies the elementwiseUnary operation to execute.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
std::unique_ptr< IWorkload > CreateLogSoftmax(const LogSoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateGather(const GatherQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateNormalization(const NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFullyConnected(const FullyConnectedQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateComparison(const ComparisonQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateRsqrt(const RsqrtQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePad(const PadQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ComparisonDescriptor for the ComparisonLayer.
uint32_t m_TargetWidth
Target width value.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::vector< BackendOptions > ModelOptions
std::unique_ptr< IWorkload > CreateGreater(const GreaterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemImport(const MemImportQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
constexpr const char * NeonBackendId()
std::unique_ptr< IWorkload > CreateL2Normalization(const L2NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStridedSlice(const StridedSliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMinimum(const MinimumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMultiplication(const MultiplicationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQuantize(const QuantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
bool IsFastMathEnabled() const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
unsigned int GetNumberOfThreads() const
LayerDescriptor m_Parameters
LogicalBinaryOperation m_Operation
Specifies the logical operation to execute.
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
NeonWorkloadFactory(const std::shared_ptr< NeonMemoryManager > &memoryManager)
std::unique_ptr< IWorkload > CreateReshape(const ReshapeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateAbs(const AbsQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSoftmax(const SoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateResizeBilinear(const ResizeBilinearQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateEqual(const EqualQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSlice(const SliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::unique_ptr< IWorkload > CreateQuantizedLstm(const QuantizedLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetWidth
Target width value.
const BackendId & GetBackendId() const override
std::unique_ptr< IWorkload > CreateSplitter(const SplitterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFill(const FillQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDequantize(const DequantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemCopy(const MemCopyQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
std::unique_ptr< IWorkload > CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetHeight
Target height value.
uint32_t m_TargetHeight
Target height value.
std::unique_ptr< IWorkload > CreatePreCompiled(const PreCompiledQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLogicalBinary(const LogicalBinaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateOutput(const OutputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConstant(const ConstantQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStack(const StackQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFloor(const FloorQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateActivation(const ActivationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
std::vector< ITensorHandle * > m_Outputs
std::unique_ptr< IWorkload > CreateMerger(const MergerQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDivision(const DivisionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
std::unique_ptr< IWorkload > CreateReduce(const ReduceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateInput(const InputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQLstm(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchNormalization(const BatchNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
Contains information about inputs and outputs to a layer.
std::unique_ptr< IWorkload > CreateResize(const ResizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< IWorkload > CreateConcat(const ConcatQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePooling2d(const Pooling2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateAddition(const AdditionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSubtraction(const SubtractionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMaximum(const MaximumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::unique_ptr< IWorkload > CreateMean(const MeanQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateConvertBf16ToFp32(const ConvertBf16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateRank(const RankQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateArgMinMax(const ArgMinMaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePrelu(const PreluQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTranspose(const TransposeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToDepth(const SpaceToDepthQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePermute(const PermuteQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override