ArmNN
 25.11
Loading...
Searching...
No Matches
ClWorkloadFactory.cpp
Go to the documentation of this file.
1//
2// Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
6#include "ClBackendId.hpp"
10
11#include <Layer.hpp>
12
13#include <armnn/Exceptions.hpp>
14#include <armnn/Logging.hpp>
15#include <armnn/Utils.hpp>
19
24
25#include <cl/ClTensorHandle.hpp>
28
29#include <arm_compute/core/CL/CLKernelLibrary.h>
30#include <arm_compute/runtime/CL/CLBufferAllocator.h>
31#include <arm_compute/runtime/CL/CLScheduler.h>
32
34#include <fstream>
35
36#include <sys/stat.h>
37
38namespace armnn
39{
40
41namespace
42{
43static const BackendId s_Id{ClBackendId()};
44}
45
47 Optional<DataType> dataType,
48 std::string& outReasonIfUnsupported)
49{
50 return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
51}
52
54 Optional<DataType> dataType,
55 std::string& outReasonIfUnsupported,
56 const ModelOptions& modelOptions)
57{
58 return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
59}
60
62{
63 return s_Id;
64}
65
67{
68 if(m_ModelContextPtr)
69 {
70 auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
71 if (modelOptions->SaveCachedNetwork())
72 {
74 serializer.Serialize(m_CLCompileContext);
75 auto cachedFd = modelOptions->GetCachedFileDescriptor();
76 if (cachedFd != -1)
77 {
78 std::vector<uint8_t> compiledContextData;
79 std::stringstream stream;
80 bool serialized = serializer.SaveSerializedToStream(stream);
81 if (serialized)
82 {
83 std::string const serializedString{stream.str()};
84 std::copy(serializedString.begin(),
85 serializedString.end(),
86 std::back_inserter(compiledContextData));
87 auto success = write(cachedFd, compiledContextData.data(), compiledContextData.size());
88 if (success == -1)
89 {
90 ARMNN_LOG(info) << "ClWorkloadFactory:: Could not cache the compiled context!";
91 }
92 }
93 }
94
95 // Save map to a filepath provided in ModelOptions
96 auto filePath = modelOptions->GetCachedNetworkFilePath();
97 if (filePath != "" && fs::exists(filePath) && fs::is_regular_file(filePath))
98 {
99 // Serialize ClContext to the file specified
100 std::ofstream file(filePath, std::ios::out | std::ios::binary);
101 serializer.SaveSerializedToStream(file);
102 }
103 }
104 }
105}
106
107template <typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args>
108std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
109 const WorkloadInfo& info,
110 Args&&... args)
111{
112 try
113 {
114 return MakeWorkloadHelper<FloatWorkload, Uint8Workload>(descriptor, info, std::forward<Args>(args)...);
115 }
116 catch (const cl::Error& clError)
117 {
118 throw WrapClError(clError, CHECK_LOCATION());
119 }
120}
121
122template <typename Workload, typename QueueDescriptorType, typename... Args>
123std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
124 const WorkloadInfo& info,
125 Args&&... args)
126{
127 try
128 {
129 return std::make_unique<Workload>(descriptor, info, std::forward<Args>(args)...);
130 }
131 catch (const cl::Error& clError)
132 {
133 throw WrapClError(clError, CHECK_LOCATION());
134 }
135}
136
137void ClWorkloadFactory::InitializeCLCompileContext()
138{
139 // Initialize our m_CLCompileContext using default device and context
140 auto context = arm_compute::CLKernelLibrary::get().context();
141 auto device = arm_compute::CLKernelLibrary::get().get_device();
142 m_CLCompileContext = arm_compute::CLCompileContext(context, device);
143
144 if (m_ModelContextPtr)
145 {
146 // Load saved programs if the user has set a filepath
147 auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
148 auto filePath = modelOptions->GetCachedNetworkFilePath();
149 if (!(modelOptions->SaveCachedNetwork()))
150 {
151 ClContextDeserializer deserializer;
152 auto cachedFd = modelOptions->GetCachedFileDescriptor();
153 if (cachedFd != -1)
154 {
155 struct stat statBuffer;
156 if (fstat(cachedFd, &statBuffer) == 0)
157 {
158 long dataSize = static_cast<long>(statBuffer.st_size);
159 if( dataSize > 0)
160 {
161 auto offset = lseek(cachedFd, 0, SEEK_CUR);
162 if (offset == 0)
163 {
164 std::vector <uint8_t> compiledContextData(static_cast<unsigned int>(dataSize));
165 auto success = pread(cachedFd, compiledContextData.data(), compiledContextData.size(), 0);
166 if (success != -1)
167 {
168 deserializer.DeserializeFromBinary(m_CLCompileContext,
169 context,
170 device,
171 compiledContextData);
172 }
173 }
174 }
175
176 }
177 }
178
179 if (filePath != "" && fs::exists(filePath) && fs::is_regular_file(filePath))
180 {
181 // Deserialize binary file and load into m_CLCompileContext
182 deserializer.Deserialize(m_CLCompileContext, context, device, filePath);
183 }
184 }
185 }
186}
187
188ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager)
189 : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
190{
191 InitializeCLCompileContext();
192}
193
194ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
196 : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
197{
198 InitializeCLCompileContext();
199}
200
201std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
202 const bool IsMemoryManaged) const
203{
204 IgnoreUnused(IsMemoryManaged);
205 std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
206 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
207
208 return tensorHandle;
209}
210
211std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
212 DataLayout dataLayout,
213 const bool IsMemoryManaged) const
214{
215 IgnoreUnused(IsMemoryManaged);
216 std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
217 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
218
219 return tensorHandle;
220}
221
222std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
223 TensorShape const& subTensorShape,
224 unsigned int const* subTensorOrigin) const
225{
226 arm_compute::Coordinates coords;
227 arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
228
229 coords.set_num_dimensions(subTensorShape.GetNumDimensions());
230 for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
231 {
232 // Arm compute indexes tensor coords in reverse order.
233 unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
234 coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
235 }
236
237 const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
238 if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
239 {
240 return nullptr;
241 }
242
243 return std::make_unique<ClSubTensorHandle>(
244 PolymorphicDowncast<IClTensorHandle*>(&parent), shape, coords);
245}
246
247std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type,
248 const QueueDescriptor& descriptor,
249 const WorkloadInfo& info) const
250{
251 switch(type)
252 {
254 {
255 auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
256 return MakeWorkload<ClActivationWorkload>(*activationQueueDescriptor, info, m_CLCompileContext);
257 }
259 {
260 auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
261 return MakeWorkload<ClAdditionWorkload>(*additionQueueDescriptor, info, m_CLCompileContext);
262 }
264 {
265 auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
266 return MakeWorkload<ClArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info, m_CLCompileContext);
267 }
269 {
270 auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
271 return std::make_unique<ClBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info, m_CLCompileContext);
272 }
274 {
275 auto batchNormalizationQueueDescriptor
277 return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>
278 (*batchNormalizationQueueDescriptor, info, m_CLCompileContext);
279 }
281 {
282 auto batchToSpaceNdQueueDescriptor
284 return MakeWorkload<ClBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor, info, m_CLCompileContext);
285 }
286 case LayerType::Cast :
287 {
288 auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
289 return MakeWorkload<ClCastWorkload>(*castQueueDescriptor, info, m_CLCompileContext);
290 }
292 {
293 auto channelShuffleQueueDescriptor
295 return MakeWorkload<ClChannelShuffleWorkload>(*channelShuffleQueueDescriptor, info, m_CLCompileContext);
296 }
298 {
299 auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
300 return MakeWorkload<ClComparisonWorkload>(*comparisonQueueDescriptor, info, m_CLCompileContext);
301 }
302 case LayerType::Concat :
303 {
304 auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
305 return MakeWorkload<ClConcatWorkload>(*concatQueueDescriptor, info, m_CLCompileContext);
306 }
308 {
309 auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
310 return MakeWorkload<ClConstantWorkload>(*constantQueueDescriptor, info, m_CLCompileContext);
311 }
313 {
314 auto convertFp16ToFp32QueueDescriptor
316 return MakeWorkload<ClConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor,
317 info,
318 m_CLCompileContext);
319 }
321 {
322 auto convertFp32ToFp16QueueDescriptor
324 return MakeWorkload<ClConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor,
325 info,
326 m_CLCompileContext);
327 }
329 {
330 auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
331 bool isFastMathEnabled = false;
332 if (m_ModelContextPtr)
333 {
334 if (m_ModelContextPtr.get() != nullptr)
335 {
336 auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
337 if (modelOptions)
338 {
339 isFastMathEnabled = modelOptions->IsFastMathEnabled();
340 }
341 }
342 }
343 return MakeWorkload<ClConvolution2dWorkload>(*convolution2dQueueDescriptor,
344 info,
345 m_MemoryManager->GetIntraLayerManager(),
346 m_CLCompileContext,
347 isFastMathEnabled);
348 }
350 {
351 auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
352 bool isFastMathEnabled = false;
353 if (m_ModelContextPtr)
354 {
355 if (m_ModelContextPtr.get() != nullptr)
356 {
357 auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
358 if (modelOptions)
359 {
360 isFastMathEnabled = modelOptions->IsFastMathEnabled();
361 }
362 }
363 }
364 return MakeWorkload<ClConvolution3dWorkload>(*convolution3dQueueDescriptor,
365 info,
366 m_MemoryManager->GetIntraLayerManager(),
367 m_CLCompileContext,
368 isFastMathEnabled);
369 }
370 case LayerType::Debug :
371 {
372 auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
373 return MakeWorkload<NullWorkload, NullWorkload>(*debugQueueDescriptor, info, m_CLCompileContext);
374 }
376 {
377 auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
378 return MakeWorkload<ClDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor, info, m_CLCompileContext);
379 }
381 {
382 auto depthwiseConvolution2dQueueDescriptor
384 return MakeWorkload<ClDepthwiseConvolutionWorkload>(*depthwiseConvolution2dQueueDescriptor,
385 info,
386 m_CLCompileContext);
387 }
389 {
390 auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
391 return MakeWorkload<ClDequantizeWorkload>(*dequantizeQueueDescriptor, info, m_CLCompileContext);
392 }
394 {
395 auto detectionPostProcessQueueDescriptor
397 return MakeWorkload<NullWorkload, NullWorkload>(*detectionPostProcessQueueDescriptor,
398 info,
399 m_CLCompileContext);
400 }
402 {
403 auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
404 return std::make_unique<ClDivisionWorkload>(*divisionQueueDescriptor, info, m_CLCompileContext);
405 }
407 {
408 auto elementwiseBinaryQueueDescriptor
410 switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
411 {
413 {
414 AdditionQueueDescriptor additionQueueDescriptor;
415 additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
416 additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
417 additionQueueDescriptor.m_AdditionalInfoObject =
418 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
419 return std::make_unique<ClAdditionWorkload>(additionQueueDescriptor, info, m_CLCompileContext);
420 }
422 {
423 DivisionQueueDescriptor divisionQueueDescriptor;
424 divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
425 divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
426 divisionQueueDescriptor.m_AdditionalInfoObject =
427 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
428 return std::make_unique<ClDivisionWorkload>(divisionQueueDescriptor, info, m_CLCompileContext);
429 }
431 {
432 DivisionQueueDescriptor divisionQueueDescriptor;
433 divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
434 divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
435 divisionQueueDescriptor.m_AdditionalInfoObject =
436 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
437 return std::make_unique<ClFloorDivWorkload>(divisionQueueDescriptor, info, m_CLCompileContext);
438 }
440 {
441 MaximumQueueDescriptor maximumQueueDescriptor;
442 maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
443 maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
444 maximumQueueDescriptor.m_AdditionalInfoObject =
445 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
446 return std::make_unique<ClMaximumWorkload>(maximumQueueDescriptor, info, m_CLCompileContext);
447 }
449 {
450 MinimumQueueDescriptor minimumQueueDescriptor;
451 minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
452 minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
453 minimumQueueDescriptor.m_AdditionalInfoObject =
454 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
455 return std::make_unique<ClMinimumWorkload>(minimumQueueDescriptor, info, m_CLCompileContext);
456 }
458 {
459 MultiplicationQueueDescriptor multiplicationQueueDescriptor;
460 multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
461 multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
462 multiplicationQueueDescriptor.m_AdditionalInfoObject =
463 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
464 return std::make_unique<ClMultiplicationWorkload>(multiplicationQueueDescriptor,
465 info,
466 m_CLCompileContext);
467 }
470 {
471 return std::make_unique<ClElementwiseBinaryWorkload>(*elementwiseBinaryQueueDescriptor,
472 info,
473 m_CLCompileContext);
474 }
476 {
477 SubtractionQueueDescriptor subtractionQueueDescriptor;
478 subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
479 subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
480 subtractionQueueDescriptor.m_AdditionalInfoObject =
481 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
482 return std::make_unique<ClSubtractionWorkload>(subtractionQueueDescriptor,
483 info,
484 m_CLCompileContext);
485 }
486 default:
487 return nullptr;
488 }
489 }
491 {
492 auto elementwiseUnaryQueueDescriptor
494 switch(elementwiseUnaryQueueDescriptor->m_Parameters.m_Operation)
495 {
497 {
498 AbsQueueDescriptor absQueueDescriptor;
499 absQueueDescriptor.m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
500 absQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
501 return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext);
502 }
504 return std::make_unique<ClExpWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
506 return std::make_unique<ClLogWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
508 return std::make_unique<ClLogicalNotWorkload>(*elementwiseUnaryQueueDescriptor,
509 info,
510 m_CLCompileContext);
512 return std::make_unique<ClNegWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
514 {
515 RsqrtQueueDescriptor rsqrtQueueDescriptor;
516 rsqrtQueueDescriptor.m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
517 rsqrtQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
518 return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext);
519 }
521 return std::make_unique<ClSinWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
523 return std::make_unique<ClSqrtWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
524 default:
525 return nullptr;
526 }
527 }
528 case LayerType::Fill :
529 {
530 auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
531 return std::make_unique<ClFillWorkload>(*fillQueueDescriptor, info, m_CLCompileContext);
532 }
533 case LayerType::Floor :
534 {
535 auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
536 return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(*floorQueueDescriptor, info, m_CLCompileContext);
537 }
539 {
540 auto fullyConnectedQueueDescriptor
542 return MakeWorkload<ClFullyConnectedWorkload>(*fullyConnectedQueueDescriptor,
543 info,
544 m_MemoryManager->GetIntraLayerManager(),
545 m_CLCompileContext);
546 }
547 case LayerType::Gather :
548 {
549 auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
550 return MakeWorkload<ClGatherWorkload>(*gatherQueueDescriptor, info, m_CLCompileContext);
551 }
553 {
554 auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
555 return MakeWorkload<ClGatherNdWorkload>(*gatherNdQueueDescriptor, info, m_CLCompileContext);
556 }
557 case LayerType::Input :
558 {
559 auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
560 return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor, info);
561 }
563 {
564 auto instanceNormalizationQueueDescriptor
566 return MakeWorkload<ClInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor,
567 info,
568 m_CLCompileContext);
569 }
571 {
572 auto l2NormalizationQueueDescriptor
574 return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(*l2NormalizationQueueDescriptor,
575 info,
576 m_CLCompileContext);
577 }
579 {
580 auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
581 switch(logicalBinaryQueueDescriptor->m_Parameters.m_Operation)
582 {
584 return std::make_unique<ClLogicalAndWorkload>(*logicalBinaryQueueDescriptor,
585 info,
586 m_CLCompileContext);
588 return std::make_unique<ClLogicalOrWorkload>(*logicalBinaryQueueDescriptor,
589 info,
590 m_CLCompileContext);
591 default:
592 return nullptr;
593 }
594 }
596 {
597 auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
598 return MakeWorkload<ClLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor,
599 info,
600 m_MemoryManager->GetIntraLayerManager(),
601 m_CLCompileContext);
602 }
603 case LayerType::Lstm :
604 {
605 auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
606 return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(*lstmQueueDescriptor, info, m_CLCompileContext);
607 }
608 case LayerType::Maximum :
609 {
610 auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
611 return MakeWorkload<ClMaximumWorkload>(*maximumQueueDescriptor, info, m_CLCompileContext);
612 }
613 case LayerType::Mean :
614 {
615 auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
616 return MakeWorkload<ClMeanWorkload>(*meanQueueDescriptor, info, m_CLCompileContext);
617 }
618 case LayerType::MemCopy :
619 {
620 auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
621 if (memCopyQueueDescriptor->m_Inputs.empty() || !memCopyQueueDescriptor->m_Inputs[0])
622 {
623 throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload");
624 }
625 return MakeWorkload<CopyMemGenericWorkload>(*memCopyQueueDescriptor, info);
626 }
628 {
629 auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
630 if (memImportQueueDescriptor->m_Inputs.empty() || !memImportQueueDescriptor->m_Inputs[0])
631 {
632 throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload");
633 }
634 return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor, info);
635 }
636 case LayerType::Minimum :
637 {
638 auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
639 return MakeWorkload<ClMinimumWorkload>(*minimumQueueDescriptor, info, m_CLCompileContext);
640 }
642 {
643 auto multiplicationQueueDescriptor = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
644 return MakeWorkload<ClMultiplicationWorkload>(*multiplicationQueueDescriptor, info, m_CLCompileContext);
645 }
647 {
648 auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
649 return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(*normalizationQueueDescriptor,
650 info,
651 m_CLCompileContext);
652 }
653 case LayerType::Output :
654 {
655 auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
656 return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor, info);
657 }
658 case LayerType::Pad :
659 {
660 auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
661 return MakeWorkload<ClPadWorkload>(*padQueueDescriptor, info, m_CLCompileContext);
662 }
663 case LayerType::Permute :
664 {
665 auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
666 return MakeWorkload<ClPermuteWorkload>(*permuteQueueDescriptor, info, m_CLCompileContext);
667 }
669 {
670 auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
671 return MakeWorkload<ClPooling2dWorkload>(*pooling2dQueueDescriptor, info, m_CLCompileContext);
672 }
674 {
675 auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
676 return MakeWorkload<ClPooling3dWorkload>(*pooling3dQueueDescriptor, info, m_CLCompileContext);
677 }
679 {
680 auto preCompiledQueueDescriptor = PolymorphicDowncast<const PreCompiledQueueDescriptor*>(&descriptor);
681 return MakeWorkload<NullWorkload, NullWorkload>(*preCompiledQueueDescriptor, info, m_CLCompileContext);
682 }
683 case LayerType::Prelu :
684 {
685 auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
686 return MakeWorkload<ClPreluWorkload>(*preluQueueDescriptor, info, m_CLCompileContext);
687 }
688 case LayerType::QLstm :
689 {
690 auto qLstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
691 return std::make_unique<ClQLstmWorkload>(*qLstmQueueDescriptor, info, m_CLCompileContext);
692 }
694 {
695 auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
696 return MakeWorkload<ClQuantizeWorkload>(*quantizeQueueDescriptor, info, m_CLCompileContext);
697 }
699 {
700 auto quantizedLstmQueueDescriptor = PolymorphicDowncast<const QuantizedLstmQueueDescriptor*>(&descriptor);
701 return MakeWorkload<ClQuantizedLstmWorkload>(*quantizedLstmQueueDescriptor, info, m_CLCompileContext);
702 }
703 case LayerType::Rank :
704 {
705 auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
706 return std::make_unique<ClRankWorkload>(*rankQueueDescriptor, info);
707 }
708 case LayerType::Reduce :
709 {
710 auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
711 return std::make_unique<ClReduceWorkload>(*reduceQueueDescriptor, info);
712 }
713 case LayerType::Reshape :
714 {
715 auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
716 return MakeWorkload<ClReshapeWorkload>(*reshapeQueueDescriptor, info, m_CLCompileContext);
717 }
718 case LayerType::Resize :
719 {
720 auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
721 return MakeWorkload<ClResizeWorkload>(*resizeQueueDescriptor, info, m_CLCompileContext);
722 }
724 {
725 auto reverseV2QueueDescriptor = PolymorphicDowncast<const ReverseV2QueueDescriptor*>(&descriptor);
726 return MakeWorkload<ClReverseV2Workload>(*reverseV2QueueDescriptor, info, m_CLCompileContext);
727 }
729 {
730 auto scatterNdQueueDescriptor = PolymorphicDowncast<const ScatterNdQueueDescriptor*>(&descriptor);
731 return MakeWorkload<ClScatterNdWorkload>(*scatterNdQueueDescriptor, info, m_CLCompileContext);
732 }
733 case LayerType::Slice :
734 {
735 auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
736 return MakeWorkload<ClSliceWorkload>(*sliceQueueDescriptor, info, m_CLCompileContext);
737 }
738 case LayerType::Softmax :
739 {
740 auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
741 return std::make_unique<ClSoftmaxWorkload>(*softmaxQueueDescriptor,
742 info,
743 m_MemoryManager->GetIntraLayerManager(),
744 m_CLCompileContext);
745 }
747 {
748 auto spaceToBatchNdQueueDescriptor
750 return MakeWorkload<ClSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor, info, m_CLCompileContext);
751 }
753 {
754 auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
755 return MakeWorkload<ClSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor, info, m_CLCompileContext);
756 }
758 {
759 auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
760 return MakeWorkload<ClSplitterWorkload>(*splitterQueueDescriptor, info, m_CLCompileContext);
761 }
762 case LayerType::Stack :
763 {
764 auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
765 return MakeWorkload<ClStackWorkload>(*stackQueueDescriptor, info, m_CLCompileContext);
766 }
768 {
769 auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
770 return MakeWorkload<ClStridedSliceWorkload>(*stridedSliceQueueDescriptor, info, m_CLCompileContext);
771 }
773 {
774 auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
775 return MakeWorkload<ClSubtractionWorkload>(*subtractionQueueDescriptor, info, m_CLCompileContext);
776 }
777 case LayerType::Tile:
778 {
779 auto tileQueueDescriptor = PolymorphicDowncast<const TileQueueDescriptor*>(&descriptor);
780 return MakeWorkload<ClTileWorkload>(*tileQueueDescriptor, info, m_CLCompileContext);
781 }
783 {
784 auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
785 return MakeWorkload<ClTransposeWorkload>(*transposeQueueDescriptor, info, m_CLCompileContext);
786 }
788 {
789 auto transposeConvolution2dQueueDescriptor
791 return MakeWorkload<ClTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor,
792 info,
793 m_MemoryManager->GetIntraLayerManager(),
794 m_CLCompileContext);
795 }
797 {
799 return MakeWorkloadHelper<ClUnidirectionalSequenceLstmFloatWorkload, NullWorkload>(*desc,
800 info,
801 m_CLCompileContext);
802 }
803 default:
804 return nullptr;
805 }
806}
807
808
809
810} // namespace armnn
#define CHECK_LOCATION()
#define ARMNN_LOG(severity)
Definition Logging.hpp:212
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override
std::unique_ptr< IWorkload > CreateWorkload(LayerType type, const QueueDescriptor &descriptor, const WorkloadInfo &info) const override
Backends should implement their own CreateWorkload function with a switch statement.
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
const BackendId & GetBackendId() const override
ClWorkloadFactory(const std::shared_ptr< ClMemoryManager > &memoryManager)
void AfterWorkloadsCreated() override
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition INetwork.hpp:81
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition Tensor.cpp:174
Copyright (c) 2021 ARM Limited and Contributors.
RuntimeException WrapClError(const cl::Error &clError, const CheckLocation &location)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition Types.hpp:494
@ UnidirectionalSequenceLstm
Definition Types.hpp:496
std::vector< BackendOptions > ModelOptions
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
constexpr const char * ClBackendId()
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 > FloatWorkload
Definition Workload.hpp:195
DataLayout
Definition Types.hpp:63
TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 > Uint8Workload
Definition Workload.hpp:203
void IgnoreUnused(Ts &&...)
std::vector< ITensorHandle * > m_Inputs
std::vector< ITensorHandle * > m_Outputs
Contains information about TensorInfos of a layer.