Backends should implement their own CreateWorkload function with a switch statement.
The case for the switch should be the LayerType and based on that they will call their specific workload creation functionality.
255 auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
256 return MakeWorkload<ClActivationWorkload>(*activationQueueDescriptor, info, m_CLCompileContext);
260 auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
261 return MakeWorkload<ClAdditionWorkload>(*additionQueueDescriptor, info, m_CLCompileContext);
265 auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
266 return MakeWorkload<ClArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info, m_CLCompileContext);
270 auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
271 return std::make_unique<ClBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info, m_CLCompileContext);
275 auto batchNormalizationQueueDescriptor
276 = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
277 return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>
278 (*batchNormalizationQueueDescriptor, info, m_CLCompileContext);
282 auto batchToSpaceNdQueueDescriptor
283 = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
284 return MakeWorkload<ClBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor, info, m_CLCompileContext);
288 auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
289 return MakeWorkload<ClCastWorkload>(*castQueueDescriptor, info, m_CLCompileContext);
293 auto channelShuffleQueueDescriptor
294 = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
295 return MakeWorkload<ClChannelShuffleWorkload>(*channelShuffleQueueDescriptor, info, m_CLCompileContext);
299 auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
300 return MakeWorkload<ClComparisonWorkload>(*comparisonQueueDescriptor, info, m_CLCompileContext);
304 auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
305 return MakeWorkload<ClConcatWorkload>(*concatQueueDescriptor, info, m_CLCompileContext);
309 auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
310 return MakeWorkload<ClConstantWorkload>(*constantQueueDescriptor, info, m_CLCompileContext);
314 auto convertFp16ToFp32QueueDescriptor
315 = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
316 return MakeWorkload<ClConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor,
322 auto convertFp32ToFp16QueueDescriptor
323 = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
324 return MakeWorkload<ClConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor,
330 auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
331 bool isFastMathEnabled =
false;
332 if (m_ModelContextPtr)
334 if (m_ModelContextPtr.get() !=
nullptr)
336 auto modelOptions =
dynamic_cast<ClBackendModelContext*
>(m_ModelContextPtr.get());
339 isFastMathEnabled = modelOptions->IsFastMathEnabled();
343 return MakeWorkload<ClConvolution2dWorkload>(*convolution2dQueueDescriptor,
345 m_MemoryManager->GetIntraLayerManager(),
351 auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
352 bool isFastMathEnabled =
false;
353 if (m_ModelContextPtr)
355 if (m_ModelContextPtr.get() !=
nullptr)
357 auto modelOptions =
dynamic_cast<ClBackendModelContext*
>(m_ModelContextPtr.get());
360 isFastMathEnabled = modelOptions->IsFastMathEnabled();
364 return MakeWorkload<ClConvolution3dWorkload>(*convolution3dQueueDescriptor,
366 m_MemoryManager->GetIntraLayerManager(),
372 auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
373 return MakeWorkload<NullWorkload, NullWorkload>(*debugQueueDescriptor, info, m_CLCompileContext);
377 auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
378 return MakeWorkload<ClDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor, info, m_CLCompileContext);
382 auto depthwiseConvolution2dQueueDescriptor
383 = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
384 return MakeWorkload<ClDepthwiseConvolutionWorkload>(*depthwiseConvolution2dQueueDescriptor,
390 auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
391 return MakeWorkload<ClDequantizeWorkload>(*dequantizeQueueDescriptor, info, m_CLCompileContext);
395 auto detectionPostProcessQueueDescriptor
396 = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
397 return MakeWorkload<NullWorkload, NullWorkload>(*detectionPostProcessQueueDescriptor,
403 auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
404 return std::make_unique<ClDivisionWorkload>(*divisionQueueDescriptor, info, m_CLCompileContext);
408 auto elementwiseBinaryQueueDescriptor
409 = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
410 switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
414 AdditionQueueDescriptor additionQueueDescriptor;
415 additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
416 additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
417 additionQueueDescriptor.m_AdditionalInfoObject =
418 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
419 return std::make_unique<ClAdditionWorkload>(additionQueueDescriptor, info, m_CLCompileContext);
423 DivisionQueueDescriptor divisionQueueDescriptor;
424 divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
425 divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
426 divisionQueueDescriptor.m_AdditionalInfoObject =
427 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
428 return std::make_unique<ClDivisionWorkload>(divisionQueueDescriptor, info, m_CLCompileContext);
432 MaximumQueueDescriptor maximumQueueDescriptor;
433 maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
434 maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
435 maximumQueueDescriptor.m_AdditionalInfoObject =
436 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
437 return std::make_unique<ClMaximumWorkload>(maximumQueueDescriptor, info, m_CLCompileContext);
441 MinimumQueueDescriptor minimumQueueDescriptor;
442 minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
443 minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
444 minimumQueueDescriptor.m_AdditionalInfoObject =
445 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
446 return std::make_unique<ClMinimumWorkload>(minimumQueueDescriptor, info, m_CLCompileContext);
450 MultiplicationQueueDescriptor multiplicationQueueDescriptor;
451 multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
452 multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
453 multiplicationQueueDescriptor.m_AdditionalInfoObject =
454 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
455 return std::make_unique<ClMultiplicationWorkload>(multiplicationQueueDescriptor,
462 return std::make_unique<ClElementwiseBinaryWorkload>(*elementwiseBinaryQueueDescriptor,
468 SubtractionQueueDescriptor subtractionQueueDescriptor;
469 subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
470 subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
471 subtractionQueueDescriptor.m_AdditionalInfoObject =
472 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
473 return std::make_unique<ClSubtractionWorkload>(subtractionQueueDescriptor,
483 auto elementwiseUnaryQueueDescriptor
484 = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
485 switch(elementwiseUnaryQueueDescriptor->m_Parameters.m_Operation)
489 AbsQueueDescriptor absQueueDescriptor;
490 absQueueDescriptor.m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
491 absQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
492 return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext);
495 return std::make_unique<ClExpWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
497 return std::make_unique<ClLogWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
499 return std::make_unique<ClLogicalNotWorkload>(*elementwiseUnaryQueueDescriptor,
503 return std::make_unique<ClNegWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
506 RsqrtQueueDescriptor rsqrtQueueDescriptor;
507 rsqrtQueueDescriptor.m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
508 rsqrtQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
509 return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext);
512 return std::make_unique<ClSinWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
514 return std::make_unique<ClSqrtWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
521 auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
522 return std::make_unique<ClFillWorkload>(*fillQueueDescriptor, info, m_CLCompileContext);
526 auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
527 return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(*floorQueueDescriptor, info, m_CLCompileContext);
531 auto fullyConnectedQueueDescriptor
532 = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
533 return MakeWorkload<ClFullyConnectedWorkload>(*fullyConnectedQueueDescriptor,
535 m_MemoryManager->GetIntraLayerManager(),
540 auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
541 return MakeWorkload<ClGatherWorkload>(*gatherQueueDescriptor, info, m_CLCompileContext);
545 auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
546 return MakeWorkload<ClGatherNdWorkload>(*gatherNdQueueDescriptor, info, m_CLCompileContext);
550 auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
551 return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor, info);
555 auto instanceNormalizationQueueDescriptor
556 = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
557 return MakeWorkload<ClInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor,
563 auto l2NormalizationQueueDescriptor
564 = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
565 return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(*l2NormalizationQueueDescriptor,
571 auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
572 switch(logicalBinaryQueueDescriptor->m_Parameters.m_Operation)
575 return std::make_unique<ClLogicalAndWorkload>(*logicalBinaryQueueDescriptor,
579 return std::make_unique<ClLogicalOrWorkload>(*logicalBinaryQueueDescriptor,
588 auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
589 return MakeWorkload<ClLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor,
591 m_MemoryManager->GetIntraLayerManager(),
596 auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
597 return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(*lstmQueueDescriptor, info, m_CLCompileContext);
601 auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
602 return MakeWorkload<ClMaximumWorkload>(*maximumQueueDescriptor, info, m_CLCompileContext);
606 auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
607 return MakeWorkload<ClMeanWorkload>(*meanQueueDescriptor, info, m_CLCompileContext);
611 auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
612 if (memCopyQueueDescriptor->m_Inputs.empty() || !memCopyQueueDescriptor->m_Inputs[0])
614 throw InvalidArgumentException(
"ClWorkloadFactory: Invalid null input for MemCopy workload");
616 return MakeWorkload<CopyMemGenericWorkload>(*memCopyQueueDescriptor, info);
620 auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
621 if (memImportQueueDescriptor->m_Inputs.empty() || !memImportQueueDescriptor->m_Inputs[0])
623 throw InvalidArgumentException(
"ClWorkloadFactory: Invalid null input for MemImport workload");
625 return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor, info);
629 auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
630 return MakeWorkload<ClMinimumWorkload>(*minimumQueueDescriptor, info, m_CLCompileContext);
634 auto multiplicationQueueDescriptor = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
635 return MakeWorkload<ClMultiplicationWorkload>(*multiplicationQueueDescriptor, info, m_CLCompileContext);
639 auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
640 return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(*normalizationQueueDescriptor,
646 auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
647 return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor, info);
651 auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
652 return MakeWorkload<ClPadWorkload>(*padQueueDescriptor, info, m_CLCompileContext);
656 auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
657 return MakeWorkload<ClPermuteWorkload>(*permuteQueueDescriptor, info, m_CLCompileContext);
661 auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
662 return MakeWorkload<ClPooling2dWorkload>(*pooling2dQueueDescriptor, info, m_CLCompileContext);
666 auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
667 return MakeWorkload<ClPooling3dWorkload>(*pooling3dQueueDescriptor, info, m_CLCompileContext);
671 auto preCompiledQueueDescriptor = PolymorphicDowncast<const PreCompiledQueueDescriptor*>(&descriptor);
672 return MakeWorkload<NullWorkload, NullWorkload>(*preCompiledQueueDescriptor, info, m_CLCompileContext);
676 auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
677 return MakeWorkload<ClPreluWorkload>(*preluQueueDescriptor, info, m_CLCompileContext);
681 auto qLstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
682 return std::make_unique<ClQLstmWorkload>(*qLstmQueueDescriptor, info, m_CLCompileContext);
686 auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
687 return MakeWorkload<ClQuantizeWorkload>(*quantizeQueueDescriptor, info, m_CLCompileContext);
691 auto quantizedLstmQueueDescriptor = PolymorphicDowncast<const QuantizedLstmQueueDescriptor*>(&descriptor);
692 return MakeWorkload<ClQuantizedLstmWorkload>(*quantizedLstmQueueDescriptor, info, m_CLCompileContext);
696 auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
697 return std::make_unique<ClRankWorkload>(*rankQueueDescriptor, info);
701 auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
702 return std::make_unique<ClReduceWorkload>(*reduceQueueDescriptor, info);
706 auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
707 return MakeWorkload<ClReshapeWorkload>(*reshapeQueueDescriptor, info, m_CLCompileContext);
711 auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
712 return MakeWorkload<ClResizeWorkload>(*resizeQueueDescriptor, info, m_CLCompileContext);
716 auto reverseV2QueueDescriptor = PolymorphicDowncast<const ReverseV2QueueDescriptor*>(&descriptor);
717 return MakeWorkload<ClReverseV2Workload>(*reverseV2QueueDescriptor, info, m_CLCompileContext);
721 auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
722 return MakeWorkload<ClSliceWorkload>(*sliceQueueDescriptor, info, m_CLCompileContext);
726 auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
727 return std::make_unique<ClSoftmaxWorkload>(*softmaxQueueDescriptor,
729 m_MemoryManager->GetIntraLayerManager(),
734 auto spaceToBatchNdQueueDescriptor
735 = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
736 return MakeWorkload<ClSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor, info, m_CLCompileContext);
740 auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
741 return MakeWorkload<ClSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor, info, m_CLCompileContext);
745 auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
746 return MakeWorkload<ClSplitterWorkload>(*splitterQueueDescriptor, info, m_CLCompileContext);
750 auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
751 return MakeWorkload<ClStackWorkload>(*stackQueueDescriptor, info, m_CLCompileContext);
755 auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
756 return MakeWorkload<ClStridedSliceWorkload>(*stridedSliceQueueDescriptor, info, m_CLCompileContext);
760 auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
761 return MakeWorkload<ClSubtractionWorkload>(*subtractionQueueDescriptor, info, m_CLCompileContext);
765 auto tileQueueDescriptor = PolymorphicDowncast<const TileQueueDescriptor*>(&descriptor);
766 return MakeWorkload<ClTileWorkload>(*tileQueueDescriptor, info, m_CLCompileContext);
770 auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
771 return MakeWorkload<ClTransposeWorkload>(*transposeQueueDescriptor, info, m_CLCompileContext);
775 auto transposeConvolution2dQueueDescriptor
776 = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
777 return MakeWorkload<ClTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor,
779 m_MemoryManager->GetIntraLayerManager(),
784 auto desc = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
785 return MakeWorkloadHelper<ClUnidirectionalSequenceLstmFloatWorkload, NullWorkload>(*desc,