29 #include <arm_compute/core/CL/CLKernelLibrary.h>
30 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
31 #include <arm_compute/runtime/CL/CLScheduler.h>
48 std::string& outReasonIfUnsupported)
55 std::string& outReasonIfUnsupported,
71 if (modelOptions->SaveCachedNetwork())
75 auto cachedFd = modelOptions->GetCachedFileDescriptor();
78 std::vector<uint8_t> compiledContextData;
79 std::stringstream stream;
80 bool serialized =
serializer.SaveSerializedToStream(stream);
83 std::string
const serializedString{stream.str()};
84 std::copy(serializedString.begin(),
85 serializedString.end(),
86 std::back_inserter(compiledContextData));
87 auto success = write(cachedFd, compiledContextData.data(), compiledContextData.size());
90 ARMNN_LOG(
info) <<
"ClWorkloadFactory:: Could not cache the compiled context!";
96 auto filePath = modelOptions->GetCachedNetworkFilePath();
97 if (filePath !=
"" && fs::exists(filePath) && fs::is_regular_file(filePath))
100 std::ofstream file(filePath, std::ios::out | std::ios::binary);
108 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(
const QueueDescriptorType& descriptor,
114 return MakeWorkloadHelper<FloatWorkload, Uint8Workload>(descriptor, info, std::forward<Args>(args)...);
116 catch (
const cl::Error& clError)
122 template <
typename Workload,
typename QueueDescriptorType,
typename... Args>
123 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(
const QueueDescriptorType& descriptor,
124 const WorkloadInfo& info,
129 return std::make_unique<Workload>(descriptor, info, std::forward<Args>(args)...);
131 catch (
const cl::Error& clError)
137 void ClWorkloadFactory::InitializeCLCompileContext()
140 auto context = arm_compute::CLKernelLibrary::get().context();
141 auto device = arm_compute::CLKernelLibrary::get().get_device();
142 m_CLCompileContext = arm_compute::CLCompileContext(context, device);
144 if (m_ModelContextPtr)
147 auto modelOptions =
dynamic_cast<ClBackendModelContext*
>(m_ModelContextPtr.get());
148 auto filePath = modelOptions->GetCachedNetworkFilePath();
149 if (!(modelOptions->SaveCachedNetwork()))
151 ClContextDeserializer deserializer;
152 auto cachedFd = modelOptions->GetCachedFileDescriptor();
155 struct stat statBuffer;
156 if (fstat(cachedFd, &statBuffer) == 0)
158 long dataSize =
static_cast<long>(statBuffer.st_size);
161 auto offset = lseek(cachedFd, 0, SEEK_CUR);
164 std::vector <uint8_t> compiledContextData(
static_cast<unsigned int>(dataSize));
165 auto success = pread(cachedFd, compiledContextData.data(), compiledContextData.size(), 0);
168 deserializer.DeserializeFromBinary(m_CLCompileContext,
171 compiledContextData);
179 if (filePath !=
"" && fs::exists(filePath) && fs::is_regular_file(filePath))
182 deserializer.Deserialize(m_CLCompileContext, context, device, filePath);
189 : m_MemoryManager(memoryManager), m_ModelContextPtr(
IBackendInternal::IBackendSpecificModelContextPtr{})
191 InitializeCLCompileContext();
196 : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
198 InitializeCLCompileContext();
202 const bool IsMemoryManaged)
const
205 std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
206 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
213 const bool IsMemoryManaged)
const
216 std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
217 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
224 unsigned int const* subTensorOrigin)
const
227 arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
234 coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
237 const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.
GetShape());
238 if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
243 return std::make_unique<ClSubTensorHandle>(
244 PolymorphicDowncast<IClTensorHandle*>(&parent), shape, coords);
255 auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
256 return MakeWorkload<ClActivationWorkload>(*activationQueueDescriptor,
info, m_CLCompileContext);
260 auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
261 return MakeWorkload<ClAdditionWorkload>(*additionQueueDescriptor,
info, m_CLCompileContext);
265 auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
266 return MakeWorkload<ClArgMinMaxWorkload>(*argMinMaxQueueDescriptor,
info, m_CLCompileContext);
270 auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
271 return std::make_unique<ClBatchMatMulWorkload>(*batchMatMulQueueDescriptor,
info, m_CLCompileContext);
275 auto batchNormalizationQueueDescriptor
276 = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
277 return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>
278 (*batchNormalizationQueueDescriptor,
info, m_CLCompileContext);
282 auto batchToSpaceNdQueueDescriptor
283 = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
284 return MakeWorkload<ClBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor,
info, m_CLCompileContext);
288 auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
289 return MakeWorkload<ClCastWorkload>(*castQueueDescriptor,
info, m_CLCompileContext);
293 auto channelShuffleQueueDescriptor
294 = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
295 return MakeWorkload<ClChannelShuffleWorkload>(*channelShuffleQueueDescriptor,
info, m_CLCompileContext);
299 auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
300 return MakeWorkload<ClComparisonWorkload>(*comparisonQueueDescriptor,
info, m_CLCompileContext);
304 auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
305 return MakeWorkload<ClConcatWorkload>(*concatQueueDescriptor,
info, m_CLCompileContext);
309 auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
310 return MakeWorkload<ClConstantWorkload>(*constantQueueDescriptor,
info, m_CLCompileContext);
314 auto convertFp16ToFp32QueueDescriptor
315 = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
316 return MakeWorkload<ClConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor,
322 auto convertFp32ToFp16QueueDescriptor
323 = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
324 return MakeWorkload<ClConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor,
330 auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
331 bool isFastMathEnabled =
false;
332 if (m_ModelContextPtr)
334 if (m_ModelContextPtr.get() !=
nullptr)
343 return MakeWorkload<ClConvolution2dWorkload>(*convolution2dQueueDescriptor,
345 m_MemoryManager->GetIntraLayerManager(),
351 auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
352 bool isFastMathEnabled =
false;
353 if (m_ModelContextPtr)
355 if (m_ModelContextPtr.get() !=
nullptr)
364 return MakeWorkload<ClConvolution3dWorkload>(*convolution3dQueueDescriptor,
366 m_MemoryManager->GetIntraLayerManager(),
372 auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
373 return MakeWorkload<NullWorkload, NullWorkload>(*debugQueueDescriptor,
info, m_CLCompileContext);
377 auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
378 return MakeWorkload<ClDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor,
info, m_CLCompileContext);
382 auto depthwiseConvolution2dQueueDescriptor
383 = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
384 return MakeWorkload<ClDepthwiseConvolutionWorkload>(*depthwiseConvolution2dQueueDescriptor,
390 auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
391 return MakeWorkload<ClDequantizeWorkload>(*dequantizeQueueDescriptor,
info, m_CLCompileContext);
395 auto detectionPostProcessQueueDescriptor
396 = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
397 return MakeWorkload<NullWorkload, NullWorkload>(*detectionPostProcessQueueDescriptor,
403 auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
404 return std::make_unique<ClDivisionWorkload>(*divisionQueueDescriptor,
info, m_CLCompileContext);
408 auto elementwiseBinaryQueueDescriptor
409 = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
410 switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
418 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
419 return std::make_unique<ClAdditionWorkload>(additionQueueDescriptor,
info, m_CLCompileContext);
427 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
428 return std::make_unique<ClDivisionWorkload>(divisionQueueDescriptor,
info, m_CLCompileContext);
436 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
437 return std::make_unique<ClFloorDivWorkload>(divisionQueueDescriptor,
info, m_CLCompileContext);
445 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
446 return std::make_unique<ClMaximumWorkload>(maximumQueueDescriptor,
info, m_CLCompileContext);
454 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
455 return std::make_unique<ClMinimumWorkload>(minimumQueueDescriptor,
info, m_CLCompileContext);
463 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
464 return std::make_unique<ClMultiplicationWorkload>(multiplicationQueueDescriptor,
471 return std::make_unique<ClElementwiseBinaryWorkload>(*elementwiseBinaryQueueDescriptor,
481 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
482 return std::make_unique<ClSubtractionWorkload>(subtractionQueueDescriptor,
492 auto elementwiseUnaryQueueDescriptor
493 = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
494 switch(elementwiseUnaryQueueDescriptor->m_Parameters.m_Operation)
499 absQueueDescriptor.
m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
500 absQueueDescriptor.
m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
501 return std::make_unique<ClAbsWorkload>(absQueueDescriptor,
info, m_CLCompileContext);
504 return std::make_unique<ClExpWorkload>(*elementwiseUnaryQueueDescriptor,
info, m_CLCompileContext);
506 return std::make_unique<ClLogWorkload>(*elementwiseUnaryQueueDescriptor,
info, m_CLCompileContext);
508 return std::make_unique<ClLogicalNotWorkload>(*elementwiseUnaryQueueDescriptor,
512 return std::make_unique<ClNegWorkload>(*elementwiseUnaryQueueDescriptor,
info, m_CLCompileContext);
516 rsqrtQueueDescriptor.
m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
517 rsqrtQueueDescriptor.
m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
518 return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor,
info, m_CLCompileContext);
521 return std::make_unique<ClSinWorkload>(*elementwiseUnaryQueueDescriptor,
info, m_CLCompileContext);
523 return std::make_unique<ClSqrtWorkload>(*elementwiseUnaryQueueDescriptor,
info, m_CLCompileContext);
530 auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
531 return std::make_unique<ClFillWorkload>(*fillQueueDescriptor,
info, m_CLCompileContext);
535 auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
536 return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(*floorQueueDescriptor,
info, m_CLCompileContext);
540 auto fullyConnectedQueueDescriptor
541 = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
542 return MakeWorkload<ClFullyConnectedWorkload>(*fullyConnectedQueueDescriptor,
544 m_MemoryManager->GetIntraLayerManager(),
549 auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
550 return MakeWorkload<ClGatherWorkload>(*gatherQueueDescriptor,
info, m_CLCompileContext);
554 auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
555 return MakeWorkload<ClGatherNdWorkload>(*gatherNdQueueDescriptor,
info, m_CLCompileContext);
559 auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
560 return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor,
info);
564 auto instanceNormalizationQueueDescriptor
565 = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
566 return MakeWorkload<ClInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor,
572 auto l2NormalizationQueueDescriptor
573 = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
574 return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(*l2NormalizationQueueDescriptor,
580 auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
581 switch(logicalBinaryQueueDescriptor->m_Parameters.m_Operation)
584 return std::make_unique<ClLogicalAndWorkload>(*logicalBinaryQueueDescriptor,
588 return std::make_unique<ClLogicalOrWorkload>(*logicalBinaryQueueDescriptor,
597 auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
598 return MakeWorkload<ClLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor,
600 m_MemoryManager->GetIntraLayerManager(),
605 auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
606 return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(*lstmQueueDescriptor,
info, m_CLCompileContext);
610 auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
611 return MakeWorkload<ClMaximumWorkload>(*maximumQueueDescriptor,
info, m_CLCompileContext);
615 auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
616 return MakeWorkload<ClMeanWorkload>(*meanQueueDescriptor,
info, m_CLCompileContext);
620 auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
621 if (memCopyQueueDescriptor->m_Inputs.empty() || !memCopyQueueDescriptor->m_Inputs[0])
625 return MakeWorkload<CopyMemGenericWorkload>(*memCopyQueueDescriptor,
info);
629 auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
630 if (memImportQueueDescriptor->m_Inputs.empty() || !memImportQueueDescriptor->m_Inputs[0])
634 return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor,
info);
638 auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
639 return MakeWorkload<ClMinimumWorkload>(*minimumQueueDescriptor,
info, m_CLCompileContext);
643 auto multiplicationQueueDescriptor = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
644 return MakeWorkload<ClMultiplicationWorkload>(*multiplicationQueueDescriptor,
info, m_CLCompileContext);
648 auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
649 return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(*normalizationQueueDescriptor,
655 auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
656 return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor,
info);
660 auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
661 return MakeWorkload<ClPadWorkload>(*padQueueDescriptor,
info, m_CLCompileContext);
665 auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
666 return MakeWorkload<ClPermuteWorkload>(*permuteQueueDescriptor,
info, m_CLCompileContext);
670 auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
671 return MakeWorkload<ClPooling2dWorkload>(*pooling2dQueueDescriptor,
info, m_CLCompileContext);
675 auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
676 return MakeWorkload<ClPooling3dWorkload>(*pooling3dQueueDescriptor,
info, m_CLCompileContext);
680 auto preCompiledQueueDescriptor = PolymorphicDowncast<const PreCompiledQueueDescriptor*>(&descriptor);
681 return MakeWorkload<NullWorkload, NullWorkload>(*preCompiledQueueDescriptor,
info, m_CLCompileContext);
685 auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
686 return MakeWorkload<ClPreluWorkload>(*preluQueueDescriptor,
info, m_CLCompileContext);
690 auto qLstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
691 return std::make_unique<ClQLstmWorkload>(*qLstmQueueDescriptor,
info, m_CLCompileContext);
695 auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
696 return MakeWorkload<ClQuantizeWorkload>(*quantizeQueueDescriptor,
info, m_CLCompileContext);
700 auto quantizedLstmQueueDescriptor = PolymorphicDowncast<const QuantizedLstmQueueDescriptor*>(&descriptor);
701 return MakeWorkload<ClQuantizedLstmWorkload>(*quantizedLstmQueueDescriptor,
info, m_CLCompileContext);
705 auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
706 return std::make_unique<ClRankWorkload>(*rankQueueDescriptor,
info);
710 auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
711 return std::make_unique<ClReduceWorkload>(*reduceQueueDescriptor,
info);
715 auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
716 return MakeWorkload<ClReshapeWorkload>(*reshapeQueueDescriptor,
info, m_CLCompileContext);
720 auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
721 return MakeWorkload<ClResizeWorkload>(*resizeQueueDescriptor,
info, m_CLCompileContext);
725 auto reverseV2QueueDescriptor = PolymorphicDowncast<const ReverseV2QueueDescriptor*>(&descriptor);
726 return MakeWorkload<ClReverseV2Workload>(*reverseV2QueueDescriptor,
info, m_CLCompileContext);
730 auto scatterNdQueueDescriptor = PolymorphicDowncast<const ScatterNdQueueDescriptor*>(&descriptor);
731 return MakeWorkload<ClScatterNdWorkload>(*scatterNdQueueDescriptor,
info, m_CLCompileContext);
735 auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
736 return MakeWorkload<ClSliceWorkload>(*sliceQueueDescriptor,
info, m_CLCompileContext);
740 auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
741 return std::make_unique<ClSoftmaxWorkload>(*softmaxQueueDescriptor,
743 m_MemoryManager->GetIntraLayerManager(),
748 auto spaceToBatchNdQueueDescriptor
749 = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
750 return MakeWorkload<ClSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor,
info, m_CLCompileContext);
754 auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
755 return MakeWorkload<ClSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor,
info, m_CLCompileContext);
759 auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
760 return MakeWorkload<ClSplitterWorkload>(*splitterQueueDescriptor,
info, m_CLCompileContext);
764 auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
765 return MakeWorkload<ClStackWorkload>(*stackQueueDescriptor,
info, m_CLCompileContext);
769 auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
770 return MakeWorkload<ClStridedSliceWorkload>(*stridedSliceQueueDescriptor,
info, m_CLCompileContext);
774 auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
775 return MakeWorkload<ClSubtractionWorkload>(*subtractionQueueDescriptor,
info, m_CLCompileContext);
779 auto tileQueueDescriptor = PolymorphicDowncast<const TileQueueDescriptor*>(&descriptor);
780 return MakeWorkload<ClTileWorkload>(*tileQueueDescriptor,
info, m_CLCompileContext);
784 auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
785 return MakeWorkload<ClTransposeWorkload>(*transposeQueueDescriptor,
info, m_CLCompileContext);
789 auto transposeConvolution2dQueueDescriptor
790 = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
791 return MakeWorkload<ClTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor,
793 m_MemoryManager->GetIntraLayerManager(),
798 auto desc = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
799 return MakeWorkloadHelper<ClUnidirectionalSequenceLstmFloatWorkload, NullWorkload>(*desc,
#define ARMNN_LOG(severity)
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
bool IsFastMathEnabled() const
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override
std::unique_ptr< IWorkload > CreateWorkload(LayerType type, const QueueDescriptor &descriptor, const WorkloadInfo &info) const override
Backends should implement their own CreateWorkload function with a switch statement.
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
const BackendId & GetBackendId() const override
ClWorkloadFactory(const std::shared_ptr< ClMemoryManager > &memoryManager)
void AfterWorkloadsCreated() override
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Copyright (c) 2021 ARM Limited and Contributors.
RuntimeException WrapClError(const cl::Error &clError, const CheckLocation &location)
void IgnoreUnused(Ts &&...)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
@ UnidirectionalSequenceLstm
std::vector< BackendOptions > ModelOptions
TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 > FloatWorkload
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 > Uint8Workload
constexpr const char * ClBackendId()
void * m_AdditionalInfoObject
std::vector< ITensorHandle * > m_Inputs
std::vector< ITensorHandle * > m_Outputs
Contains information about TensorInfos of a layer.